Some packages.
options(digits = 2)
library(pacman)
p_load(kirkegaard)
Done by system call to PLINK.
#find 23andme files
list_23andme = dir("data/23andme", pattern = "\\.tsv", full.names = T)
#call plink on each
for (f in list_23andme) {
system(sprintf("plink --23file %s --make-bed --out %s",
f,
"data/bed/" + str_filename(f) %>% str_replace("\\.tsv$", "")
)
)
}
We use Okbay et al meta-analyis (education).
#gwas file
gwas_file = "data/GWAS/Okbay_2016_top5000.txt"
#inspect in R too
gwas = read_tsv(gwas_file)
## Parsed with column specification:
## cols(
## MarkerName = col_character(),
## CHR = col_integer(),
## POS = col_integer(),
## A1 = col_character(),
## A2 = col_character(),
## EAF = col_double(),
## Beta = col_double(),
## SE = col_double(),
## Pval = col_double()
## )
#gwas col specification
#these must be the cols that refer to id, allele, effect size
gwas_cols = "1 4 7"
#find persons to score
list_bed = dir("data/bed", pattern = ".bed", full.names = T) %>%
#remove .bed extension
str_replace("\\.bed$", "")
#call plink on each
for (f in list_bed) {
system(sprintf("plink --bfile %s --score %s %s header --out %s",
f,
gwas_file,
gwas_cols,
"data/scored/" + (f %>% str_filename())
)
)
}
Load the scored genomes from PLINK and plot.
#polygenics
list_polygenics = dir("data/scored", pattern = "\\.profile$", full.names = T)