Lovell — Aug 8, 2014, 11:43 AM
#drydown 2014- candidate gene analysis
setwd("~/Desktop/Drydown2014")
rm(list=ls())
gos<-read.csv("dd2014_GOtermsall.csv", header=T)
library(ggplot2)
nucleotide.div<-read.csv("tsu_kas_CDS_nucleotide_divergence.csv", header=T)
protein.div<-read.csv("tsu_kas_CDS_protein_divergence.csv", header=T)
onegene.go<-gos[0,]
for(i in as.character(unique(gos$gene))){
idx<-gos[gos$gene==i,][1,]
onegene.go<-rbind(onegene.go,idx)
}
outall.div<-merge(nucleotide.div,protein.div, by=c("Gene","Type","Start","End","Orientation","Missing"))
names(outall.div)
[1] "Gene" "Type" "Start"
[4] "End" "Orientation" "Missing"
[7] "Gene_Model.x" "Sites.x" "SNPs"
[10] "Divergence.x" "Gene_Model.y" "Sites.y"
[13] "Substitution" "Divergence.y" "Premature_Stops"
outall.div<-outall.div[,-c(7)]
names(outall.div)[c(7,9,10,11,13)]<-c("bp","nuc.div","gene.model","daltons","pro.div")
names(onegene.go)[1]<-"Gene"
all.div.go<-merge(onegene.go,outall.div, by="Gene")
all.div.go$pro.div.fac<-cut(all.div.go$pro.div, breaks=4)
all.div.go.gxe<-all.div.go[all.div.go$region=="gxe",]
all.div.go.reg<-all.div.go[all.div.go$region!="gxe",]
ggplot(all.div.go.reg, aes(x=Start, y=nuc.div,col=pro.div.fac))+
geom_point()+ scale_color_discrete()+theme_bw()+
facet_wrap(~region, scales="free_x")+
ggtitle("nucleotide and protein diversity in the 3 regions w/o candidates")
all.div.go.gxe$chr<-substring(all.div.go.gxe$Gene, 3, 3)
ggplot(all.div.go.gxe, aes(x=Start, y=nuc.div,col=pro.div.fac))+
geom_point()+ scale_color_discrete()+theme_bw()+
facet_wrap(~chr)+ggtitle("distribution of nucleotide and protein diversity for GxE genes")
outall.div$chr<-substring(outall.div$Gene, 3, 3)
test<-outall.div
outall.div$pro.div[outall.div$pro.div<0]<-NA
outall.div<-outall.div[complete.cases(outall.div),]
ggplot(outall.div, aes(y=nuc.div, x=Start))+
geom_point()+facet_wrap(~chr)+theme_bw()+
ggtitle("nucleotide divergence between tsu and kas")
ggplot(outall.div, aes(y=pro.div, x=Start))+
geom_point()+facet_wrap(~chr)+theme_bw()+
ggtitle("protein divergence between tsu and kas")
ggplot(outall.div, aes(y=nuc.div, x=pro.div))+
geom_point()+theme_bw()+
geom_abline(intercept = 0, slope = 1)+
ggtitle("correlation of protein and nucleotide diversity")
# ch4.out<-outall.div[outall.div$chr==4 & outall.div$Start<5e+06,]
#
# ggplot(ch4.out, aes(y=nuc.div, x=Start))+
# geom_point()+facet_wrap(~chr)+theme_bw()+
# ggtitle("nucleotide divergence between tsu and kas")
#
# ggplot(outall.div, aes(y=pro.div, x=Start))+
# geom_point()+facet_wrap(~chr)+theme_bw()+
# ggtitle("nucleotide divergence between tsu and kas")