Table of contents
1 Setup
Date=Sys.Date()[1]
## Warning in as.POSIXlt.POSIXct(Sys.time()): unknown timezone 'zone/tz/2018c.
## 1.0/zoneinfo/Asia/Shanghai'
print(paste0("Document was last updated at ", Date, "."))
## [1] "Document was last updated at 2018-07-02."
library(knitr)
## Warning: package 'knitr' was built under R version 3.4.3
source("manhattan.R")
plink='/Users/gc5k/bin/plink_mac/plink'
dat="Clean_Merge_Final_Unrelated_NoOutliers_NoDiffSNPs_NoDuplicates"
nchr=22
PC=5
gcMat=matrix(0, PC, 2)
2 QC
maf=paste0(plink, " --bfile ", dat, " --freq --out ", dat)
system(maf)
mafRes=read.table(paste0(dat, ".frq"), as.is = T, header = T)
hist(mafRes$MAF, main="MAF", xlab="MAF")

het=paste0(plink, " --bfile ", dat, " --het --out ", dat)
system(het)
hetRes=read.table(paste0(dat, ".het"), as.is = T, header = T)
hist(hetRes$F, main="Het", xlab="Het", breaks = 25)

hardy=paste0(plink, " --bfile ", dat, " --hardy --out ", dat)
system(hardy)
hardyRes=read.table(paste0(dat, ".hwe"), as.is = T, header = T)
hardyRes=hardyRes[hardyRes$TEST=="ALL",]
bim=read.table(paste0(dat, ".bim"), as.is = T)
hardyRes$BP=bim[,4]
hardyResS=hardyRes[order(hardyRes$P),]
manhattan(hardyResS, pch=16, cex=0.5, bty='l')

kable(hardyResS[1:10,])
| 1431598 |
11 |
rs308351 |
ALL |
T |
C |
908/1795/1375 |
0.44020 |
0.49340 |
0 |
67731956 |
| 152188 |
1 |
rs3219104 |
ALL |
T |
G |
789/1764/1517 |
0.43340 |
0.48400 |
0 |
226562621 |
| 2203330 |
22 |
rs4393 |
ALL |
G |
T |
529/1608/1934 |
0.39500 |
0.44040 |
0 |
48672954 |
| 1431586 |
11 |
rs10896215 |
ALL |
C |
T |
644/1697/1724 |
0.41750 |
0.46470 |
0 |
67536948 |
| 1649815 |
13 |
rs12864684 |
ALL |
C |
A |
258/1226/2575 |
0.30200 |
0.33710 |
0 |
85710908 |
| 817807 |
6 |
KGP6523226 |
ALL |
A |
G |
12/104/3969 |
0.02546 |
0.03084 |
0 |
31778307 |
| 121018 |
1 |
rs11810038 |
ALL |
A |
G |
51/472/3545 |
0.11600 |
0.13110 |
0 |
184144995 |
| 820312 |
6 |
rs743862 |
ALL |
C |
T |
107/777/3192 |
0.19060 |
0.21360 |
0 |
32381939 |
| 1303813 |
10 |
KGP2803099 |
ALL |
C |
T |
686/1732/1656 |
0.42510 |
0.47170 |
0 |
42947627 |
| 28519 |
1 |
KGP10221406 |
ALL |
A |
G |
1091/1839/1147 |
0.45110 |
0.49990 |
0 |
31722166 |
3 PCA
if (!file.exists(paste0(dat, ".grm.gz"))) {
grm=paste(plink, " --bfile ", dat, " --autosome-num ", nchr, " --make-grm gz --out ", dat)
system(grm)
}
grmF=read.table(gzfile(paste0(dat,".grm.gz")), as.is=T)
hist(grmF[grmF[,1]!=grmF[,2],4], main="Genetic relatedness", xlab="Genetic relatedness")

if (!file.exists(paste0(dat, ".grm.gz"))) {
pca=paste(plink, " --pca 5 --bfile ", dat, " --allow-no-sex --autosome-num ", nchr, " --out ", dat)
system(pca)
}
gm=read.table(paste0(dat, ".eigenval"), as.is = T, header = F)
gcMat[,1]=gm[1:5, 1]
eve=read.table(paste0(dat, ".eigenvec"), as.is = T, header = F)
plot(eve[,3], eve[,4], xlab="PC 1", ylab="PC 2", main="PCA", bty='l', pch=16, cex=0.5)

4 EigenGWAS
for(i in 1:PC) {
if(!file.exists(paste0(dat, i, ".assoc.linear"))) {
assoc=paste0(plink, " --linear --bfile ", dat, " --pheno ", dat, ".eigenvec ", "--mpheno ", i, "--allow-no-sex --autosome-num ", nchr, " --out ", dat, i)
system(assoc)
}
gwasF=read.table(paste0(dat, i, ".assoc.linear"), as.is = T, header = T)
gc=qchisq(median(gwasF$P, na.rm = T), 1, lower.tail = F)/qchisq(0.5, 1, lower.tail = F) #LambdaGC
gcMat[i,2]=gc
dat1=gwasF
dat1$P=pchisq(gwasF$STAT^2/gc, 1, lower.tail = F)
manhattan(dat1, limitchromosomes = 1:max(dat1$CHR), pch=16, cex=0.5, bty='l')
}





colnames(gcMat)=c("Eigenval", "GC")
kable(gcMat)
| 6.18833 |
5.508873 |
| 4.26989 |
1.514528 |
| 3.47842 |
1.133984 |
| 2.96205 |
1.148394 |
| 2.62383 |
1.078687 |
barplot(t(gcMat), beside = T, border = F)
legend("topright", legend = c("Eigenvalue", expression(paste(lambda[GC]))), pch=15, col=c("black", "grey"), bty='n')
