This is an R Markdown Notebook describing the pipeline where we plot the GWAS results as Manhattan and qq plots.
The full collection of raw GWAS files which were used for this analysis can be found here
library(qqman)
##
## For example usage please run: vignette('qqman')
##
## Citation appreciated but not required:
## Turner, S.D. qqman: an R package for visualizing GWAS results using Q-Q and manhattan plots. biorXiv DOI: 10.1101/005165 (2014).
##
library(qvalue)
library(cowplot)
##
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
## default ggplot2 theme anymore. To recover the previous
## behavior, execute:
## theme_set(theme_cowplot())
## ********************************************************
setwd("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Mariam's data/Mariam analysis/GWAS_new/")
list.files()
## [1] "20200325 Manhattan.R"
## [2] "20200326 Manhattan_jpeg.R"
## [3] "20200326 Manhattan.R"
## [4] "20200407_GWAS_and_qqplots.nb.html"
## [5] "20200407_GWAS_and_qqplots.Rmd"
## [6] "Figure_FvFm_Locus.pdf"
## [7] "Figure_QYmac_Locus.pdf"
## [8] "FvFm_common_manhattan.tiff"
## [9] "FvFm_common_qq.tiff"
## [10] "FvFm_control_manhattan.tiff"
## [11] "FvFm_control_qqplot.tiff"
## [12] "FvFm_full_manhattan.tiff"
## [13] "FvFm_full_qqplot.tiff"
## [14] "FvFm_L4_3_S_gwas2029.rda"
## [15] "FvFm_Lss4_3_C_FvFm_Lss4_3_S_mtmm_final.rda"
## [16] "FvFm_Lss4_3_C_gwas2029.rda"
## [17] "FvFm_salt_manhattan.tiff"
## [18] "FvFm_salt_qqplot.tiff"
## [19] "FvFm_spec_manhattan.tiff"
## [20] "FvFm_spec_qqplot.tiff"
## [21] "GR1_Control_manhattan.tiff"
## [22] "GR1_Control_qqplot.tiff"
## [23] "GR1_Salt_manhattan.tiff"
## [24] "GR1_Salt_qqplot.tiff"
## [25] "GR2_Control_manhattan.tiff"
## [26] "GR2_Control_qqplot.tiff"
## [27] "GR2_Salt_manhattan.tiff"
## [28] "GR2_Salt_qqplot.tiff"
## [29] "GRc1_no.o_gwas2029.rda"
## [30] "GRc2_no.o_gwas2029.rda"
## [31] "GRs1_no.o_gwas2029.rda"
## [32] "GRs2_no.o_gwas2029.rda"
## [33] "QY_control_manhattan.tiff"
## [34] "QY_control_qqplot.tiff"
## [35] "QY_max_5_C_gwas2029.rda"
## [36] "QY_max_5_S_gwas2029.rda"
## [37] "QY_salt_manhattan.tiff"
## [38] "QY_salt_qqplot.tiff"
## [39] "SFigure_FvFm_Locus_qqplots.pdf"
## [40] "SFigure_GR_GWAS.pdf"
## [41] "SFigure_QYmac_Locus.pdf"
## [42] "SIIT_1_no.o_gwas2029.rda"
## [43] "SIIT_2_no.o_gwas2029.rda"
## [44] "STI1_manhattan.tiff"
## [45] "STI1_qqplot.tiff"
## [46] "STI2_manhattan.tiff"
## [47] "STI2_qqplot.tiff"
## [48] "Thumbs.db"
First - we need to load the rda file - which can be accessed as “result” object
load("FvFm_Lss4_3_C_gwas2029.rda")
# Prepare table for graphing:
args <- commandArgs(trailingOnly=TRUE)
results$Pval <-as.numeric(as.character(results$Pval))
Then - we would like to only visualize the SNPs with Minor Allele Frequency above 5%, and thus calculate the Bonferroni threshold (b) based on SNP number > MAF 0.05
results <- subset(results, results$MAF > 0.05)
a <- nrow(results)
b <- 0.05/a
Finally - we can plot the Manhattan plot as :
manhattan(results, chr="Chr",bp="Pos", p="Pval", snp="SNP", main="Fv'/Fm' Control at 3 d.a.s.", ylim=c(0,12), col = c("turquoise3", "purple4"), suggestiveline=FALSE, genomewideline=-log10(b))
And the qq plot as:
qq(results$Pval, main = "Q-Q plot of GWAS for Fv'/Fm' Control at 3 d.a.s.", cex = 1.5, las = 1)
Because the pdf files are very large and cause a lot of trouble in Illustrator software - we save the output as tiff files:
par(mar=c(1,1,1,1))
tiff("FvFm_control_manhattan.tiff", res = 300, width = 8, height = 6.4, units = 'in', compression = c( "lzw"))
manhattan(results, chr="Chr",bp="Pos", p="Pval", snp="SNP", main="Fv'/Fm' Control at 3 d.a.s.", ylim=c(0,12), col = c("turquoise3", "purple4"), suggestiveline=FALSE, genomewideline=-log10(b))
dev.off()
## quartz_off_screen
## 2
par(mar=c(1,1,1,1))
tiff("FvFm_control_qqplot.tiff", res = 300, width = 8, height = 6.4, units = 'in', compression = c( "lzw"))
qq(results$Pval, main = "Q-Q plot of GWAS for Fv'/Fm' Control at 3 d.a.s.", cex = 1.5, las = 1)
dev.off()
## quartz_off_screen
## 2
rm(list = ls())
Subsequently - we follow the same steps for Fv’/Fm’ associations in salt stress conditions:
load("FvFm_L4_3_S_gwas2029.rda")
args <- commandArgs(trailingOnly=TRUE)
results$Pval <-as.numeric(as.character(results$Pval))
results <- subset(results, results$MAF > 0.05)
a <- nrow(results)
b <- 0.05/a
par(mar=c(1,1,1,1))
tiff("FvFm_salt_manhattan.tiff", res = 300, width = 8, height = 6.4, units = 'in', compression = c( "lzw"))
manhattan(results, chr="Chr",bp="Pos", p="Pval", snp="SNP", main="Fv'/Fm' Salt Stress at 3 d.a.s.", ylim=c(0,12), col = c("turquoise3", "purple4"), suggestiveline=FALSE, genomewideline=-log10(b))
dev.off()
## quartz_off_screen
## 2
par(mar=c(1,1,1,1))
tiff("FvFm_salt_qqplot.tiff", res = 300, width = 8, height = 6.4, units = 'in', compression = c( "lzw"))
qq(results$Pval, main = "Q-Q plot of GWAS for Fv'/Fm' Salt Stress at 3 d.a.s.", cex = 1.5, las = 1)
dev.off()
## quartz_off_screen
## 2
rm(list = ls())
For the MTMM model GWAS we also upload the file as per usual, take SNPs with MAF > 0.05 and calculate Bonferroni threshold (b), but the MTMM file has three different p-values
load("FvFm_Lss4_3_C_FvFm_Lss4_3_S_mtmm_final.rda")
head(results)
results <- subset(results, results$MAF > 0.05)
a <- nrow(results)
b <- 0.05/a
Therefore - we need to produce three separate Manhattan plots for each p-value which was calculated for each model:
manhattan(results, chr="Chr",bp="Pos", p="pval_full", snp="SNP", main="Fv'/Fm' at 3 d.a.s. Control & Salt Stress - Full model", col = c("turquoise3", "purple4"), ylim = c(0, 12), suggestiveline=FALSE, genomewideline=-log10(b))
manhattan(results, chr="Chr",bp="Pos", p="pval_trait_specific", main="Fv'/Fm' at 3 d.a.s. Control & Salt Stress - Trait specific model", snp="SNP", col = c("turquoise3", "purple4"), ylim = c(0, 12), suggestiveline=FALSE, genomewideline=-log10(b))
manhattan(results, chr="Chr",bp="Pos", p="pval_trait_common", snp="SNP", main="Fv'/Fm' at 3 d.a.s. Control & Salt Stress - Trait common model", col = c("turquoise3", "purple4"), ylim = c(0,12), suggestiveline=FALSE, genomewideline=-log10(b))
Same applies for the qq plots:
qq(results$pval_full, main = "Q-Q plot of GWAS for Fv'/Fm' MTMM full model", cex = 1.5, las = 1)
qq(results$pval_trait_specific, main = "Q-Q plot of GWAS for Fv'/Fm' MTMM trait specific model", cex = 1.5, las = 1)
qq(results$pval_trait_common, main = "Q-Q plot of GWAS for Fv'/Fm' MTMM trait common model", cex = 1.5, las = 1)
Save all as tiff files:
par(mar=c(1,1,1,1))
tiff("FvFm_full_manhattan.tiff", res = 300, width = 8, height = 6.4, units = 'in', compression = c( "lzw"))
manhattan(results, chr="Chr",bp="Pos", p="pval_full", snp="SNP", main="Fv'/Fm' at 3 d.a.s. Control & Salt Stress - Full model", col = c("turquoise3", "purple4"), ylim = c(0, 12), suggestiveline=FALSE, genomewideline=-log10(b))
dev.off()
## quartz_off_screen
## 2
par(mar=c(1,1,1,1))
tiff("FvFm_full_qqplot.tiff", res = 300, width = 8, height = 6.4, units = 'in', compression = c( "lzw"))
qq(results$pval_full, main = "Q-Q plot of GWAS for Fv'/Fm' MTMM full model", cex = 1.5, las = 1)
dev.off()
## quartz_off_screen
## 2
par(mar=c(1,1,1,1))
tiff("FvFm_spec_manhattan.tiff", res = 300, width = 8, height = 6.4, units = 'in', compression = c( "lzw"))
manhattan(results, chr="Chr",bp="Pos", p="pval_trait_specific", main="Fv'/Fm' at 3 d.a.s. Control & Salt Stress - Trait specific model", snp="SNP", col = c("turquoise3", "purple4"), ylim = c(0, 12), suggestiveline=FALSE, genomewideline=-log10(b))
dev.off()
## quartz_off_screen
## 2
par(mar=c(1,1,1,1))
tiff("FvFm_spec_qqplot.tiff", res = 300, width = 8, height = 6.4, units = 'in', compression = c( "lzw"))
qq(results$pval_trait_specific, main = "Q-Q plot of GWAS for Fv'/Fm' MTMM trait specific model", cex = 1.5, las = 1)
dev.off()
## quartz_off_screen
## 2
par(mar=c(1,1,1,1))
tiff("FvFm_common_manhattan.tiff", res = 300, width = 8, height = 6.4, units = 'in', compression = c( "lzw"))
manhattan(results, chr="Chr",bp="Pos", p="pval_trait_common", snp="SNP", main="Fv'/Fm' at 3 d.a.s. Control & Salt Stress - Trait common model", col = c("turquoise3", "purple4"), ylim = c(0,12), suggestiveline=FALSE, genomewideline=-log10(b))
dev.off()
## quartz_off_screen
## 2
par(mar=c(1,1,1,1))
tiff("FvFm_common_qq.tiff", res = 300, width = 8, height = 6.4, units = 'in', compression = c( "lzw"))
qq(results$pval_trait_common, main = "Q-Q plot of GWAS for Fv'/Fm' MTMM trait common model", cex = 1.5, las = 1)
dev.off()
## quartz_off_screen
## 2
The other locus that we focus on is the QY-max locus, for which we were not able to do MTMM model, as the data did not comply. Instead - we did only the simple one-trait models:
load("QY_max_5_C_gwas2029.rda")
# Prepare table for graphing:
args <- commandArgs(trailingOnly=TRUE)
results$Pval <-as.numeric(as.character(results$Pval))
results <- subset(results, results$MAF > 0.05)
a <- nrow(results)
b <- 0.05/a
manhattan(results, chr="Chr",bp="Pos", p="Pval", snp="SNP", main="QY max Control at 5 d.a.s.", ylim=c(0,12), col = c("turquoise3", "purple4"), suggestiveline=FALSE, genomewideline=-log10(b))
par(mar=c(1,1,1,1))
tiff("QY_control_manhattan.tiff", res = 300, width = 8, height = 6.4, units = 'in', compression = c( "lzw"))
manhattan(results, chr="Chr",bp="Pos", p="Pval", snp="SNP", main="QY max Control at 5 d.a.s.", ylim=c(0,12), col = c("turquoise3", "purple4"), suggestiveline=FALSE, genomewideline=-log10(b))
dev.off()
## quartz_off_screen
## 2
par(mar=c(1,1,1,1))
tiff("QY_control_qqplot.tiff", res = 300, width = 8, height = 6.4, units = 'in', compression = c( "lzw"))
qq(results$Pval, main = "Q-Q plot of GWAS for QY max Control at 5 d.a.s.", cex = 1.5, las = 1)
dev.off()
## quartz_off_screen
## 2
load("QY_max_5_S_gwas2029.rda")
args <- commandArgs(trailingOnly=TRUE)
results$Pval <-as.numeric(as.character(results$Pval))
results <- subset(results, results$MAF > 0.05)
a <- nrow(results)
b <- 0.05/a
manhattan(results, chr="Chr",bp="Pos", p="Pval", snp="SNP", main="QY max Salt Stress at 5 d.a.s.", ylim=c(0,12), col = c("turquoise3", "purple4"), suggestiveline=FALSE, genomewideline=-log10(b))
par(mar=c(1,1,1,1))
tiff("QY_salt_manhattan.tiff", res = 300, width = 8, height = 6.4, units = 'in', compression = c( "lzw"))
manhattan(results, chr="Chr",bp="Pos", p="Pval", snp="SNP", main="QY max Salt Stress at 5 d.a.s.", ylim=c(0,12), col = c("turquoise3", "purple4"), suggestiveline=FALSE, genomewideline=-log10(b))
dev.off()
## quartz_off_screen
## 2
par(mar=c(1,1,1,1))
tiff("QY_salt_qqplot.tiff", res = 300, width = 8, height = 6.4, units = 'in', compression = c( "lzw"))
qq(results$Pval, main = "Q-Q plot of GWAS for QY max Salt Stress at 5 d.a.s.", cex = 1.5, las = 1)
dev.off()
## quartz_off_screen
## 2
We had high hopes to get some associations with the growth rate - but alas - it did not yield any convincing associations either using single trait associations in the first interval (between 0 and 3 days after stress application):
load("GRc1_no.o_gwas2029.rda")
# Prepare table for graphing:
args <- commandArgs(trailingOnly=TRUE)
results$Pval <-as.numeric(as.character(results$Pval))
results <- subset(results, results$MAF > 0.05)
a <- nrow(results)
b <- 0.05/a
manhattan(results, chr="Chr",bp="Pos", p="Pval", snp="SNP", main="Growth Rate Control between 0-3 d.a.s.", ylim=c(0,12), col = c("turquoise3", "purple4"), suggestiveline=FALSE, genomewideline=-log10(b))
par(mar=c(1,1,1,1))
tiff("GR1_Control_manhattan.tiff", res = 300, width = 8, height = 6.4, units = 'in', compression = c( "lzw"))
manhattan(results, chr="Chr",bp="Pos", p="Pval", snp="SNP", main="Growth Rate Control between 0-3 d.a.s.", ylim=c(0,12), col = c("turquoise3", "purple4"), suggestiveline=FALSE, genomewideline=-log10(b))
dev.off()
## quartz_off_screen
## 2
par(mar=c(1,1,1,1))
tiff("GR1_Control_qqplot.tiff", res = 300, width = 8, height = 6.4, units = 'in', compression = c( "lzw"))
qq(results$Pval, main = "Q-Q plot of GWAS for Growth Rate Control at 0-3 d.a.s.", cex = 1.5, las = 1)
dev.off()
## quartz_off_screen
## 2
load("GRs1_no.o_gwas2029.rda")
args <- commandArgs(trailingOnly=TRUE)
results$Pval <-as.numeric(as.character(results$Pval))
results <- subset(results, results$MAF > 0.05)
a <- nrow(results)
b <- 0.05/a
manhattan(results, chr="Chr",bp="Pos", p="Pval", snp="SNP", main="Growth Rate Salt Stress between 0-3 d.a.s.", ylim=c(0,12), col = c("turquoise3", "purple4"), suggestiveline=FALSE, genomewideline=-log10(b))
par(mar=c(1,1,1,1))
tiff("GR1_Salt_manhattan.tiff", res = 300, width = 8, height = 6.4, units = 'in', compression = c( "lzw"))
manhattan(results, chr="Chr",bp="Pos", p="Pval", snp="SNP", main="Growth Rate Salt Stress between 0-3 d.a.s.", ylim=c(0,12), col = c("turquoise3", "purple4"), suggestiveline=FALSE, genomewideline=-log10(b))
dev.off()
## quartz_off_screen
## 2
par(mar=c(1,1,1,1))
tiff("GR1_Salt_qqplot.tiff", res = 300, width = 8, height = 6.4, units = 'in', compression = c( "lzw"))
GR1_S_qq <- qq(results$Pval, main = "Q-Q plot of GWAS for Growth Rate Salt Stress at 0-3 d.a.s.", cex = 1.5, las = 1)
dev.off()
## quartz_off_screen
## 2
load("SIIT_1_no.o_gwas2029.rda")
args <- commandArgs(trailingOnly=TRUE)
results$Pval <-as.numeric(as.character(results$Pval))
results <- subset(results, results$MAF > 0.05)
a <- nrow(results)
b <- 0.05/a
manhattan(results, chr="Chr",bp="Pos", p="Pval", snp="SNP", main="Salt Tolerance Index based on Growth Rate between 0-3 d.a.s.", ylim=c(0,12), col = c("turquoise3", "purple4"), suggestiveline=FALSE, genomewideline=-log10(b))
par(mar=c(1,1,1,1))
tiff("STI1_manhattan.tiff", res = 300, width = 8, height = 6.4, units = 'in', compression = c( "lzw"))
manhattan(results, chr="Chr",bp="Pos", p="Pval", snp="SNP", main="Salt Tolerance Index based on Growth Rate between 0-3 d.a.s.", ylim=c(0,12), col = c("turquoise3", "purple4"), suggestiveline=FALSE, genomewideline=-log10(b))
dev.off()
## quartz_off_screen
## 2
par(mar=c(1,1,1,1))
tiff("STI1_qqplot.tiff", res = 300, width = 8, height = 6.4, units = 'in', compression = c( "lzw"))
qq(results$Pval, main = "Q-Q plot of GWAS for STI at 0-3 d.a.s.", cex = 1.5, las = 1)
dev.off()
## quartz_off_screen
## 2
Neither using single trait models in the second interval:
load("GRc2_no.o_gwas2029.rda")
# Prepare table for graphing:
args <- commandArgs(trailingOnly=TRUE)
results$Pval <-as.numeric(as.character(results$Pval))
results <- subset(results, results$MAF > 0.05)
a <- nrow(results)
b <- 0.05/a
manhattan(results, chr="Chr",bp="Pos", p="Pval", snp="SNP", main="Growth Rate Control between 4-7 d.a.s.", ylim=c(0,12), col = c("turquoise3", "purple4"), suggestiveline=FALSE, genomewideline=-log10(b))
par(mar=c(1,1,1,1))
tiff("GR2_Control_manhattan.tiff", res = 300, width = 8, height = 6.4, units = 'in', compression = c( "lzw"))
manhattan(results, chr="Chr",bp="Pos", p="Pval", snp="SNP", main="Growth Rate Control between 4-7 d.a.s.", ylim=c(0,12), col = c("turquoise3", "purple4"), suggestiveline=FALSE, genomewideline=-log10(b))
dev.off()
## quartz_off_screen
## 2
par(mar=c(1,1,1,1))
tiff("GR2_Control_qqplot.tiff", res = 300, width = 8, height = 6.4, units = 'in', compression = c( "lzw"))
qq(results$Pval, main = "Q-Q plot of GWAS for Growth Rate Control at 4-7 d.a.s.", cex = 1.5, las = 1)
dev.off()
## quartz_off_screen
## 2
load("GRs2_no.o_gwas2029.rda")
args <- commandArgs(trailingOnly=TRUE)
results$Pval <-as.numeric(as.character(results$Pval))
results <- subset(results, results$MAF > 0.05)
a <- nrow(results)
b <- 0.05/a
manhattan(results, chr="Chr",bp="Pos", p="Pval", snp="SNP", main="Growth Rate Salt Stress between 4-7 d.a.s.", ylim=c(0,12), col = c("turquoise3", "purple4"), suggestiveline=FALSE, genomewideline=-log10(b))
par(mar=c(1,1,1,1))
tiff("GR2_Salt_manhattan.tiff", res = 300, width = 8, height = 6.4, units = 'in', compression = c( "lzw"))
manhattan(results, chr="Chr",bp="Pos", p="Pval", snp="SNP", main="Growth Rate Salt Stress between 4-7 d.a.s.", ylim=c(0,12), col = c("turquoise3", "purple4"), suggestiveline=FALSE, genomewideline=-log10(b))
dev.off()
## quartz_off_screen
## 2
par(mar=c(1,1,1,1))
tiff("GR2_Salt_qqplot.tiff", res = 300, width = 8, height = 6.4, units = 'in', compression = c( "lzw"))
qq(results$Pval, main = "Q-Q plot of GWAS for Growth Rate Salt Stress at 4-7 d.a.s.", cex = 1.5, las = 1)
dev.off()
## quartz_off_screen
## 2
load("SIIT_2_no.o_gwas2029.rda")
args <- commandArgs(trailingOnly=TRUE)
results$Pval <-as.numeric(as.character(results$Pval))
results <- subset(results, results$MAF > 0.05)
a <- nrow(results)
b <- 0.05/a
manhattan(results, chr="Chr",bp="Pos", p="Pval", snp="SNP", main="Salt Tolerance Index based on Growth Rate between 4-7 d.a.s.", ylim=c(0,12), col = c("turquoise3", "purple4"), suggestiveline=FALSE, genomewideline=-log10(b))
par(mar=c(1,1,1,1))
tiff("STI2_manhattan.tiff", res = 300, width = 8, height = 6.4, units = 'in', compression = c( "lzw"))
manhattan(results, chr="Chr",bp="Pos", p="Pval", snp="SNP", main="Salt Tolerance Index based on Growth Rate between 4-7 d.a.s.", ylim=c(0,12), col = c("turquoise3", "purple4"), suggestiveline=FALSE, genomewideline=-log10(b))
dev.off()
## quartz_off_screen
## 2
par(mar=c(1,1,1,1))
tiff("STI2_qqplot.tiff", res = 300, width = 8, height = 6.4, units = 'in', compression = c( "lzw"))
qq(results$Pval, main = "Q-Q plot of GWAS for STI at 4-7 d.a.s.", cex = 1.5, las = 1)
dev.off()
## quartz_off_screen
## 2