## [1] "Document was last updated at 2018-07-01."

Table of contents

1 Setup

inbred=TRUE
FN="arab"
pheF="arab.phe"
covF="arab.eigenvec"
####################
#arab gwas
####################
plink='/Users/gc5k/bin/plink_mac/plink'
source("manhattan.R")
topN=10

2 PCA

grm=paste(plink, "--make-grm-gz --bfile ", FN, " --out ", FN)
system(grm)
grmRes = read.table(gzfile(paste0(FN, ".grm.gz")), as.is = T)
if(inbred) {
  hist(grmRes[,4]/2, xlab="GRM", main="Genetic relatedness", breaks = 50)
} else {
  hist(grmRes[,4], xlab="GRM", main="Genetic relatedness")
}

pca=paste(plink, "--pca --bfile ", FN, " --out ", FN)
system(pca)
pcaRes=read.table(paste0(FN, ".eigenvec"), as.is = T)
plot(pcaRes[,3], pcaRes[,4], xlab="PC 1", ylab="PC 2", bty='l', pch=16, cex=0.5)

3 Quality control [MAF]

maf=paste(plink, "--bfile", FN, "--freq --out ", FN)
system(maf)
mafRes=read.table(paste0(FN, ".frq"), as.is = T, header = T)
hist(mafRes$MAF, main=paste(FN, "MAF"), xlab = "MAF", breaks = 25)

4 Quality control [HET]

het=paste(plink, "--bfile", FN, "--het --out ", FN)
system(het)
hetRes=read.table(paste0(FN, ".het"), as.is = T, header = T)
hist(hetRes$F, main=paste(FN, "HET"), xlab = "HET", breaks = 25)

5 Run GWAS [Trait 1]

for(i in 1:1)
{
  out0=paste0("arab.pheno-", i)
  gwas=paste(plink, " --linear --bfile ", FN,  " --pheno ", pheF, " --mpheno ", i, " --out ", out0)
  system(gwas)
  gwas_e0=read.table(paste0(out0, ".assoc.linear"), as.is = T, header = T)

  layout(matrix(c(1,2,3,3), 2, 2, byrow = F))
  #fig1
  qqplot(main=paste("QQ plot for phenotype", i), pch=16, cex=0.5, bty="l", xlab=expression(paste("Theoretical ", chi[1]^2)), ylab=expression(paste("Observed ", chi[1]^2)), rchisq(nrow(gwas_e0), 1), qchisq(gwas_e0$P, 1, lower.tail = F))
  abline(a=0, b=1, col="red", lty=2)
  gc=qchisq(median(gwas_e0$P), 1, lower.tail = F)/qchisq(0.5, 1, lower.tail = F)

  text(5, qchisq(1/nrow(gwas_e0), 1, lower.tail = F), labels = paste("GC is:", format(gc, digits = 4)))
  #fig2
  hist(gwas_e0$P, xlab=expression(paste(italic(p),"-value")), main=expression(paste(italic(p),"-value distribution")))
  
  #fig 3
  manhattan(gwas_e0, col=c("red", "blue"), bty="l", pch=16, cex=0.5, title =  paste("Manhattan plot phenotype", i))
  
}

  gwas_e0_st=gwas_e0[order(gwas_e0$P),]
  library(knitr)
  kable(gwas_e0_st[1:topN,], caption = paste("Top", topN, "SNPs"))
Top 10 SNPs
CHR SNP BP A1 TEST NMISS BETA STAT P
25432 1 1_21357777 21357777 T ADD 295 -0.3902 -5.251 3.0e-07
25429 1 1_21357173 21357173 G ADD 295 -0.3953 -5.233 3.0e-07
77475 3 3_12453084 12453084 C ADD 293 -0.4428 -5.112 6.0e-07
25431 1 1_21357540 21357540 T ADD 295 -0.3945 -5.023 9.0e-07
25433 1 1_21358774 21358774 A ADD 294 -0.3222 -4.958 1.2e-06
155135 5 5_26004462 26004462 A ADD 294 -0.5748 -4.900 1.6e-06
103086 4 4_8759763 8759763 G ADD 295 -0.5815 -4.829 2.2e-06
64831 3 3_3711360 3711360 T ADD 294 -0.5095 -4.648 5.1e-06
58034 2 2_18345308 18345308 G ADD 293 -0.3538 -4.547 8.0e-06
136118 5 5_13877588 13877588 A ADD 295 -0.3845 -4.517 9.1e-06

6 Run GWAS [Trait 2]

for(i in 2:2)
{
  out0=paste0("arab.pheno-", i)
  gwas=paste(plink, " --linear --bfile ", FN,  " --pheno ", pheF, " --mpheno ", i, " --out ", out0)
  system(gwas)
  gwas_e0=read.table(paste0(out0, ".assoc.linear"), as.is = T, header = T)

  layout(matrix(c(1,2,3,3), 2, 2, byrow = F))
  #fig1
  qqplot(main=paste("QQ plot for phenotype", i), pch=16, cex=0.5, bty="l", xlab=expression(paste("Theoretical ", chi[1]^2)), ylab=expression(paste("Observed ", chi[1]^2)), rchisq(nrow(gwas_e0), 1), qchisq(gwas_e0$P, 1, lower.tail = F))
  abline(a=0, b=1, col="red", lty=2)
  gc=qchisq(median(gwas_e0$P), 1, lower.tail = F)/qchisq(0.5, 1, lower.tail = F)

  text(5, qchisq(1/nrow(gwas_e0), 1, lower.tail = F), labels = paste("GC is:", format(gc, digits = 4)))
  #fig2
  hist(gwas_e0$P, xlab=expression(paste(italic(p),"-value")), main=expression(paste(italic(p),"-value distribution")))
  
  #fig 3
  manhattan(gwas_e0, col=c("red", "blue"), bty="l", pch=16, cex=0.5, title = paste("Manhattan plot phenotype", i))

}

  gwas_e0_st=gwas_e0[order(gwas_e0$P),]
  library(knitr)
  kable(gwas_e0_st[1:topN,], caption = paste("Top", topN, "SNPs"))
Top 10 SNPs
CHR SNP BP A1 TEST NMISS BETA STAT P
71650 3 3_8393207 8393207 G ADD 295 -0.5047 -5.403 1.00e-07
78951 3 3_14970190 14970190 A ADD 295 -0.6445 -5.234 3.00e-07
35451 1 1_27566254 27566254 G ADD 295 -0.6288 -4.930 1.40e-06
26160 1 1_21861402 21861402 T ADD 295 -0.4378 -4.553 7.80e-06
31844 1 1_25032889 25032889 G ADD 295 -0.2494 -4.376 1.69e-05
65102 3 3_3956579 3956579 A ADD 295 -0.4206 -4.363 1.78e-05
113646 4 4_15723034 15723034 G ADD 295 -0.4065 -4.347 1.90e-05
139709 5 5_16438756 16438756 C ADD 295 -0.3354 -4.343 1.94e-05
155164 5 5_26031942 26031942 G ADD 295 -0.4575 -4.340 1.97e-05
31843 1 1_25032824 25032824 G ADD 294 -0.2469 -4.323 2.11e-05

End tabset