ALS paper Nat Comm

Table of contents

1 Setup

Date=Sys.Date()[1]
## Warning in as.POSIXlt.POSIXct(Sys.time()): unknown timezone 'zone/tz/2018c.
## 1.0/zoneinfo/Asia/Shanghai'
print(paste0("Document was last updated at ", Date, "."))
## [1] "Document was last updated at 2018-07-02."
library(knitr)
## Warning: package 'knitr' was built under R version 3.4.3
source("manhattan.R")
plink='/Users/gc5k/bin/plink_mac/plink'

dat="Clean_Merge_Final_Unrelated_NoOutliers_NoDiffSNPs_NoDuplicates"

nchr=22

PC=5
gcMat=matrix(0, PC, 2)

2 QC

maf=paste0(plink, " --bfile ", dat, " --freq --out ", dat)
system(maf)
mafRes=read.table(paste0(dat, ".frq"), as.is = T, header = T)
hist(mafRes$MAF, main="MAF", xlab="MAF")

het=paste0(plink, " --bfile ", dat, " --het --out ", dat)
system(het)
hetRes=read.table(paste0(dat, ".het"), as.is = T, header = T)
hist(hetRes$F, main="Het", xlab="Het", breaks = 25)

hardy=paste0(plink, " --bfile ", dat, " --hardy --out ", dat)
system(hardy)
hardyRes=read.table(paste0(dat, ".hwe"), as.is = T, header = T)
hardyRes=hardyRes[hardyRes$TEST=="ALL",]
bim=read.table(paste0(dat, ".bim"), as.is = T)
hardyRes$BP=bim[,4]

hardyResS=hardyRes[order(hardyRes$P),]
manhattan(hardyResS, pch=16, cex=0.5, bty='l')

kable(hardyResS[1:10,])
CHR SNP TEST A1 A2 GENO O.HET. E.HET. P BP
1431598 11 rs308351 ALL T C 908/1795/1375 0.44020 0.49340 0 67731956
152188 1 rs3219104 ALL T G 789/1764/1517 0.43340 0.48400 0 226562621
2203330 22 rs4393 ALL G T 529/1608/1934 0.39500 0.44040 0 48672954
1431586 11 rs10896215 ALL C T 644/1697/1724 0.41750 0.46470 0 67536948
1649815 13 rs12864684 ALL C A 258/1226/2575 0.30200 0.33710 0 85710908
817807 6 KGP6523226 ALL A G 12/104/3969 0.02546 0.03084 0 31778307
121018 1 rs11810038 ALL A G 51/472/3545 0.11600 0.13110 0 184144995
820312 6 rs743862 ALL C T 107/777/3192 0.19060 0.21360 0 32381939
1303813 10 KGP2803099 ALL C T 686/1732/1656 0.42510 0.47170 0 42947627
28519 1 KGP10221406 ALL A G 1091/1839/1147 0.45110 0.49990 0 31722166

3 PCA

if (!file.exists(paste0(dat, ".grm.gz"))) {
  grm=paste(plink, " --bfile ", dat, " --autosome-num ", nchr, " --make-grm gz --out ", dat)
  system(grm)
}
grmF=read.table(gzfile(paste0(dat,".grm.gz")), as.is=T)
hist(grmF[grmF[,1]!=grmF[,2],4], main="Genetic relatedness", xlab="Genetic relatedness")

if (!file.exists(paste0(dat, ".grm.gz"))) {
  pca=paste(plink, " --pca 5 --bfile ", dat, " --allow-no-sex --autosome-num ", nchr, " --out ", dat)
  system(pca)
}

gm=read.table(paste0(dat, ".eigenval"), as.is = T, header = F)
gcMat[,1]=gm[1:5, 1]
eve=read.table(paste0(dat, ".eigenvec"), as.is = T, header = F)
plot(eve[,3], eve[,4], xlab="PC 1", ylab="PC 2", main="PCA", bty='l', pch=16, cex=0.5)

4 EigenGWAS

for(i in 1:PC) {
  if(!file.exists(paste0(dat, i, ".assoc.linear"))) {
    assoc=paste0(plink, " --linear --bfile ", dat, " --pheno ", dat, ".eigenvec ", "--mpheno ", i, "--allow-no-sex --autosome-num ", nchr, " --out ", dat, i)
    system(assoc)
  }

  gwasF=read.table(paste0(dat, i, ".assoc.linear"), as.is = T, header = T)
  gc=qchisq(median(gwasF$P, na.rm = T), 1, lower.tail = F)/qchisq(0.5, 1, lower.tail = F) #LambdaGC
  gcMat[i,2]=gc

  dat1=gwasF
  dat1$P=pchisq(gwasF$STAT^2/gc, 1, lower.tail = F)
  manhattan(dat1, limitchromosomes = 1:max(dat1$CHR), pch=16, cex=0.5, bty='l')
}

colnames(gcMat)=c("Eigenval", "GC")
kable(gcMat)
Eigenval GC
6.18833 5.508873
4.26989 1.514528
3.47842 1.133984
2.96205 1.148394
2.62383 1.078687
barplot(t(gcMat), beside = T, border = F)
legend("topright", legend = c("Eigenvalue", expression(paste(lambda[GC]))), pch=15, col=c("black", "grey"), bty='n')