Lock and load
library(cowplot)
library(dplyr)
library(data.table)
library(tidyr)
library(rtracklayer)
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
teo<-fread("~/Desktop/ecl243_teo.thetas.graph.me") %>% mutate(taxa="teo") %>% select(3:15)
mz<-fread("~/Desktop/ecl243.thetas.graph.me") %>% mutate(taxa="mz") %>% select(3:15)
dat<-rbind(teo,mz) %>% mutate(pi=tP/nSites,Mb=WinCenter/1E6)
Plot maize nucleotide diversity
filter(dat,taxa=="mz") %>%
ggplot(aes(y=pi,x=Mb))+
geom_point(color=cbPalette[1])+
geom_smooth(method="loess",span=0.1,color=cbPalette[1])+
ggtitle("maize nucleotide diversity")+
xlab("Mb") +
ylab(expression(hat(theta[pi])))+
theme(axis.text=element_text(size=14), plot.title=element_text(size=18),axis.title=element_text(size=18,face="bold"))
Filter for wonky outliers in windwos with few bp
filter(dat,taxa=="mz",nSites>1000) %>%
ggplot(aes(y=pi,x=Mb))+
geom_point(color=cbPalette[1])+
geom_smooth(method="loess",span=0.1,color=cbPalette[1])+
ggtitle("maize nucleotide diversity ( > 1Kb sequence per window)")+
xlab("Mb") +
ylab(expression(hat(theta[pi])))+
theme(axis.text=element_text(size=14), plot.title=element_text(size=18),axis.title=element_text(size=18,face="bold"))
Now look at Tajima’s D
filter(dat,taxa=="mz",nSites>1000) %>%
ggplot(aes(y=Tajima,x=Mb))+
geom_point(color=cbPalette[3])+
geom_smooth(method="loess",span=0.1,color=cbPalette[3])+
ggtitle("maize Tajima's D")+
xlab("Mb") +
ylab("Tajima D")+
theme(axis.text=element_text(size=14), plot.title=element_text(size=18),axis.title=element_text(size=18,face="bold"))
Add teosinte diversity
filter(dat,nSites>1000) %>%
ggplot(aes(y=pi,x=Mb,color=taxa))+
geom_point()+
geom_smooth(method="loess",span=0.1)+
ggtitle("compared nucleotide diversity")+
xlab("Mb") +
ylab(expression(hat(theta[pi])))+
scale_color_manual(values=cbPalette)+
theme(axis.text=element_text(size=14), plot.title=element_text(size=18),axis.title=element_text(size=18,face="bold"),legend.position=c(0.8, 0.9))
And D
filter(dat,nSites>1000) %>%
ggplot(aes(y=Tajima,x=Mb,color=taxa))+
geom_point()+
geom_smooth(method="loess",span=0.1)+
ggtitle("comparison Tajima's D")+
xlab("Mb") +
ylab("Tajima D")+
scale_color_manual(values=cbPalette[3:4])+
theme(axis.text=element_text(size=14), plot.title=element_text(size=18),axis.title=element_text(size=18,face="bold"),legend.position=c(0.8, 0.9))
Relationships: Calculate \(\hat\theta_W\) and comparisons
#pi vs watterson
filter(dat,nSites>1000,taxa=="mz") %>%
mutate(watterson=tW/nSites) %>%
ggplot(aes(y=watterson,x=pi))+
geom_point()+
ggtitle("maize nucleotide diversity")+
ylab(expression(hat(theta[W])))+scale_y_log10()+
xlab(expression(hat(theta[pi])))+scale_x_log10()+
geom_smooth(method="lm")+
theme(axis.text=element_text(size=14), plot.title=element_text(size=18),axis.title=element_text(size=18,face="bold"),legend.position=c(0.8, 0.9))
Identify potential “sweeps”
#do some machinations, calculate ratio of pi values
merge(mz,teo,by="WinCenter",suffixes=c(".mz",".teo")) %>%
mutate(rat=(tP.mz/nSites.mz)/(tP.teo/nSites.teo),Mb=WinCenter/1E6) %>%
filter(nSites.mz>1000,nSites.teo>1000) %>%
ggplot(aes(y=rat,x=Mb))+
geom_point()+
ggtitle("here be sweeps?")+
ylab(expression(paste(theta[pi],"maize / ",theta[pi],"teosinte")))+
xlab("Mb")+
geom_smooth(method="loess",span=0.05)+
theme(axis.text=element_text(size=14), plot.title=element_text(size=18),axis.title=element_text(size=18,face="bold"),legend.position=c(0.8, 0.9))
Get a gff
chr10_genes<-import.gff(con="~/Desktop/Zea_mays.AGPv3.30.chromosome.10.gff3",format="gff3") %>%
as("data.frame") %>%
mutate(start=start/1E6,end=end/1E6) %>%
filter(start<25,start>15,type=="gene")
chr10_exons<-import.gff(con="~/Desktop/Zea_mays.AGPv3.30.chromosome.10.gff3",format="gff3") %>%
as("data.frame") %>%
mutate(start=start/1E6,end=end/1E6) %>%
filter(start<25,start>15,type=="exon")
Plot with genes, zoomed in to our region
merge(mz,teo,by="WinCenter",suffixes=c(".mz",".teo")) %>%
mutate(rat=(tP.mz/nSites.mz)/(tP.teo/nSites.teo),Mb=WinCenter/1E6) %>%
filter(nSites.mz>1000,nSites.teo>1000,Mb>17.5,Mb<18) %>%
ggplot(aes(y=rat,x=Mb))+
geom_point()+
ggtitle("here be sweeps?")+
ylab(expression(paste(theta[pi],"maize / ",theta[pi],"teosinte")))+
xlab("Mb")+
scale_y_log10()+xlim(c(17.5,18))+
geom_segment(aes(x = start, y = 0.08, xend = end, yend = 0.08),data=chr10_genes,colour=rgb(0,0,0,0.25),size=5)+
geom_segment(aes(x = start, y = 0.08, xend = end, yend = 0.08),data=chr10_exons,colour=rgb(0,0,0,1),size=10)+
theme(axis.text=element_text(size=14), plot.title=element_text(size=18),axis.title=element_text(size=18,face="bold"),legend.position=c(0.8, 0.9))
Which genes?
candidates<-merge(mz,teo,by="WinCenter",suffixes=c(".mz",".teo")) %>%
mutate(rat=(tP.mz/nSites.mz)/(tP.teo/nSites.teo),Mb=WinCenter/1E6) %>%
filter(nSites.mz>1000,nSites.teo>1000,rat<quantile(rat,0.05))
Let’s look at one:
mygene=filter(chr10_genes,start<17.8,start>17.7)$gene_id
print(paste('http://www.maizegdb.org/gene_center/gene/', mygene,sep=""))
## [1] "http://www.maizegdb.org/gene_center/gene/GRMZM2G137510"