This notebook describes the analysis of the GWAS data generated using the ASReml script for the UCR miniCore cowpea population screened in 2022 by Hayley Sussman. GWAS was performed by Magda Julkowska.
#install.packages("qqman")
#install.packages("qvalue")
library(qqman)
##
## For example usage please run: vignette('qqman')
##
## Citation appreciated but not required:
## Turner, (2018). qqman: an R package for visualizing GWAS results using Q-Q and manhattan plots. Journal of Open Source Software, 3(25), 731, https://doi.org/10.21105/joss.00731.
##
library(cowplot)
cowpea_GWAS <- list.files(pattern="gwas2029_new.rda")
cowpea_GWAS
## [1] "day13_FvP_over_FmP.Cntrl_gwas2029_new.rda"
## [2] "day13_FvP_over_FmP.Drght_gwas2029_new.rda"
## [3] "day13_leaf_thickness.Cntrl_gwas2029_new.rda"
## [4] "day13_leaf_thickness.Drght_gwas2029_new.rda"
## [5] "day13_Leaf.Temperature.Cntrl_gwas2029_new.rda"
## [6] "day13_Leaf.Temperature.Drght_gwas2029_new.rda"
## [7] "day13_NPQt.Cntrl_gwas2029_new.rda"
## [8] "day13_NPQt.Drght_gwas2029_new.rda"
## [9] "day13_PhiNPQ.Cntrl_gwas2029_new.rda"
## [10] "day13_PhiNPQ.Drght_gwas2029_new.rda"
## [11] "day13_SPAD.Cntrl_gwas2029_new.rda"
## [12] "day13_SPAD.Drght_gwas2029_new.rda"
## [13] "day6_FvP_over_FmP.Cntrl_gwas2029_new.rda"
## [14] "day6_FvP_over_FmP.Drght_gwas2029_new.rda"
## [15] "day6_leaf_thickness.Cntrl_gwas2029_new.rda"
## [16] "day6_leaf_thickness.Drght_gwas2029_new.rda"
## [17] "day6_Leaf.Temperature.Cntrl_gwas2029_new.rda"
## [18] "day6_Leaf.Temperature.Drght_gwas2029_new.rda"
## [19] "day6_NPQt.Cntrl_gwas2029_new.rda"
## [20] "day6_NPQt.Drght_gwas2029_new.rda"
## [21] "day6_PhiNPQ.Cntrl_gwas2029_new.rda"
## [22] "day6_PhiNPQ.Drght_gwas2029_new.rda"
## [23] "day6_SPAD.Cntrl_gwas2029_new.rda"
## [24] "day6_SPAD.Drght_gwas2029_new.rda"
## [25] "DW.Cntrl_gwas2029_new.rda"
## [26] "DW.Drght_gwas2029_new.rda"
## [27] "EVT.med.Cntrl_gwas2029_new.rda"
## [28] "EVT.med.Drght_gwas2029_new.rda"
## [29] "EVT.p.FW.Cntrl_gwas2029_new.rda"
## [30] "EVT.p.FW.Drght_gwas2029_new.rda"
## [31] "EVT.sum.Cntrl_gwas2029_new.rda"
## [32] "EVT.sum.Drght_gwas2029_new.rda"
## [33] "FW.Cntrl_gwas2029_new.rda"
## [34] "FW.Drght_gwas2029_new.rda"
## [35] "GR.Cntrl_gwas2029_new.rda"
## [36] "GR.Drght_gwas2029_new.rda"
## [37] "WC.Cntrl_gwas2029_new.rda"
## [38] "WC.Drght_gwas2029_new.rda"
load(cowpea_GWAS[1])
# Prepare table for graphing:
args <- commandArgs(trailingOnly=TRUE)
results$Pval <-as.numeric(as.character(results$Pval))
a <- nrow(results)
b <- 0.05/a
head(results)
results$Chr <- substr(results$SNP, 1, 2)
results$Chr <- as.numeric(as.character(results$Chr))
head(results)
title <- gsub("_gwas2029_new.rda", "", cowpea_GWAS[1])
manhattan(results, chr="Chr",bp="Pos", p="Pval", snp="SNP", main=title, ylim=c(0,12), col = c("turquoise3", "purple4"), suggestiveline=FALSE, genomewideline=-log10(b))
pdf(paste("Manhattan.", title, ".pdf"))
manhattan(results, chr="Chr",bp="Pos", p="Pval", snp="SNP", main=title, ylim=c(0,12), col = c("turquoise3", "purple4"), suggestiveline=FALSE, genomewideline=-log10(b))
dev.off()
## quartz_off_screen
## 2
qq(results$Pval, main = title, cex = 1.5, las = 1)
pdf(paste("QQplot.", title, ".pdf"))
qq(results$Pval, main = title, cex = 1.5, las = 1)
dev.off()
## quartz_off_screen
## 2
Let’s think about how to visualize the effect size that is also included in our GWAS result files:
max_val <- max(results$beta) * 1.2
min_val <- min(results$beta) * 1.2
manhattan(results, chr="Chr",bp="Pos", logp = FALSE, p="beta", snp="SNP", main=title, ylim=c(min_val, max_val), col = c("turquoise3", "purple4"), suggestiveline = F, genomewideline = F, ylab = "beta")
length(cowpea_GWAS)
## [1] 38
for(i in 1:38){
load(cowpea_GWAS[i])
# Prepare table for graphing:
args <- commandArgs(trailingOnly=TRUE)
results$Pval <-as.numeric(as.character(results$Pval))
a <- nrow(results)
b <- 0.05/a
results$Chr <- substr(results$SNP, 1, 2)
results$Chr <- as.numeric(as.character(results$Chr))
head(results)
title <- gsub("_gwas2029_new.rda", "", cowpea_GWAS[i])
pdf(paste("Manhattan.", title, ".pdf"))
manhattan(results, chr="Chr",bp="Pos", p="Pval", snp="SNP", main=title, ylim=c(0,12), col = c("turquoise3", "purple4"), suggestiveline=FALSE, genomewideline=-log10(b))
dev.off()
max_val <- max(results$beta) * 1.2
min_val <- min(results$beta) * 1.2
pdf(paste("Effect.Size.", title, ".pdf"))
manhattan(results, chr="Chr",bp="Pos", logp = FALSE, p="beta", snp="SNP", main=title, ylim=c(min_val, max_val), col = c("turquoise3", "purple4"), suggestiveline = F, genomewideline = F, ylab = "beta")
dev.off()
pdf(paste("QQplot.", title, ".pdf"))
qq(results$Pval, main = title, cex = 1.5, las = 1)
dev.off()
}
Now - let’s have a look at the most significant SNPs that we have identified thus far:
i=3
load(cowpea_GWAS[i])
results$Pval <-as.numeric(as.character(results$Pval))
results$LOD <- -log10(results$Pval)
results$Chr <- substr(results$SNP, 1, 2)
results$Chr <- as.numeric(as.character(results$Chr))
results$trait <- gsub("_gwas2029_new.rda", "", cowpea_GWAS[i])
LOD5 <- subset(results, results$LOD > -log10(b))
LOD5
for(i in 4:38){
load(cowpea_GWAS[i])
results$Pval <-as.numeric(as.character(results$Pval))
results$LOD <- -log10(results$Pval)
results$Chr <- substr(results$SNP, 1, 2)
results$Chr <- as.numeric(as.character(results$Chr))
results$trait <- gsub("_gwas2029_new.rda", "", cowpea_GWAS[i])
temp <- subset(results, results$LOD > -log10(b))
LOD5 <- rbind(LOD5, temp)
}
i
## [1] 38
dim(LOD5)
## [1] 59 13
LOD5
And now - I want to organize this file - so we can actually see where the interesting loci are. For this purpose - we need to order SNPs per Chromosome and Position:
Bonf <- LOD5[order(LOD5$Chr, LOD5$Pos),]
head(Bonf)
Looks gravy!
Cool - since we said we are not going to consider associations represented only by 2 accessions - we will now subset all of our associations for MAC > 2:
Bonf2 <- subset(Bonf, Bonf$MAC > 2)
dim(Bonf2)
## [1] 58 13
OK - now in order to go through the SNPs - we need to calculate the distance between the neighboring SNPs:
Bonf2$dist <- 0
for(i in 2:nrow(Bonf2)){
Bonf2$dist[i] <- Bonf2$Pos[i] - Bonf2$Pos[i-1]
}
Bonf2
write.csv(Bonf2, "Cowpea_Bonferroni_loci_2022.csv", row.names = FALSE)
So - I added some information to the table above - mainly if its of interest to us at this moment and how individual SNPs cluster into loci. Table look like this at the moment:
MMJ_Bonf <- read.csv("Cowpea_Bonferroni_loci_2022_MMJ.csv")
MMJ_Bonf
MMJ_Bonf <- subset(MMJ_Bonf, MMJ_Bonf$Of.Interest > 0)
MMJ_Bonf
Let’s generate more permissive dataset with all of the associations - since we are looking at the other SNPs associated with our loci of interest:
i=1
load(cowpea_GWAS[i])
results$Pval <-as.numeric(as.character(results$Pval))
results$LOD <- -log10(results$Pval)
results$Chr <- substr(results$SNP, 1, 2)
results$Chr <- as.numeric(as.character(results$Chr))
results$trait <- gsub("_gwas2029_new.rda", "", cowpea_GWAS[i])
LOD3 <- subset(results, results$LOD > 2)
LOD3
for(i in 2:38){
load(cowpea_GWAS[i])
results$Pval <-as.numeric(as.character(results$Pval))
results$LOD <- -log10(results$Pval)
results$Chr <- substr(results$SNP, 1, 2)
results$Chr <- as.numeric(as.character(results$Chr))
results$trait <- gsub("_gwas2029_new.rda", "", cowpea_GWAS[i])
temp <- subset(results, results$LOD > 2)
LOD3 <- rbind(LOD3, temp)
}
i
## [1] 38
dim(LOD3)
## [1] 5777 13
LOD3
First locus - Chr. 1 pos. 41703755
library(ggplot2)
library(ggpubr)
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:cowplot':
##
## get_legend
unique(MMJ_Bonf$Locus)[1]
## [1] "Locus3"
length(unique(MMJ_Bonf$Locus))
## [1] 12
for(i in 1:12){
want <- subset(MMJ_Bonf, MMJ_Bonf$Locus == unique(MMJ_Bonf$Locus)[i])
want
Chr_i_want <- want$Chr[1]
Pos_min <- min(want$Pos)
Pos_max <- max(want$Pos)
temp <- subset(LOD3, LOD3$Chr == Chr_i_want)
temp <- subset(temp, temp$Pos > Pos_min - 100000)
temp <- subset(temp, temp$Pos < Pos_max + 100000)
temp
plot(ggscatter(temp, x = "Pos", y = "LOD", size = "MAC", color = "trait") + theme(legend.position = "right"))
}
OK - nothing too alarming comes up.
Now - let’s calculate the LD for the most interesting loci, to determine the size of the interval we should be using for candidate gene:
load("Cowpea_for_GAPIT.RData")
head(myGeno)
myGeno2 <- as.data.frame(myGeno)
myGeno2$chrom <- gsub("Vu", "", myGeno2$chrom)
myGeno2$chrom <- as.numeric(myGeno2$chrom)
colnames(myGeno2)[1] <- "SNP"
myGeno2$SNP <- paste(myGeno2$chrom, myGeno2$pos, sep="_")
colnames(myGeno2)[1] <- "rs#"
myGeno2