dd2014_nucgodiv.R

Lovell — Aug 8, 2014, 11:43 AM

#drydown 2014- candidate gene analysis
setwd("~/Desktop/Drydown2014")
rm(list=ls())
gos<-read.csv("dd2014_GOtermsall.csv", header=T)
library(ggplot2)
nucleotide.div<-read.csv("tsu_kas_CDS_nucleotide_divergence.csv", header=T)
protein.div<-read.csv("tsu_kas_CDS_protein_divergence.csv", header=T)

onegene.go<-gos[0,]
for(i in as.character(unique(gos$gene))){
  idx<-gos[gos$gene==i,][1,]
  onegene.go<-rbind(onegene.go,idx)
}

outall.div<-merge(nucleotide.div,protein.div, by=c("Gene","Type","Start","End","Orientation","Missing"))
names(outall.div)
 [1] "Gene"            "Type"            "Start"          
 [4] "End"             "Orientation"     "Missing"        
 [7] "Gene_Model.x"    "Sites.x"         "SNPs"           
[10] "Divergence.x"    "Gene_Model.y"    "Sites.y"        
[13] "Substitution"    "Divergence.y"    "Premature_Stops"
outall.div<-outall.div[,-c(7)]
names(outall.div)[c(7,9,10,11,13)]<-c("bp","nuc.div","gene.model","daltons","pro.div")
names(onegene.go)[1]<-"Gene"
all.div.go<-merge(onegene.go,outall.div, by="Gene")
all.div.go$pro.div.fac<-cut(all.div.go$pro.div, breaks=4)
all.div.go.gxe<-all.div.go[all.div.go$region=="gxe",]
all.div.go.reg<-all.div.go[all.div.go$region!="gxe",]
ggplot(all.div.go.reg, aes(x=Start, y=nuc.div,col=pro.div.fac))+
  geom_point()+  scale_color_discrete()+theme_bw()+
  facet_wrap(~region, scales="free_x")+
  ggtitle("nucleotide and protein diversity in the 3 regions w/o candidates")

plot of chunk unnamed-chunk-1

all.div.go.gxe$chr<-substring(all.div.go.gxe$Gene, 3, 3)

ggplot(all.div.go.gxe, aes(x=Start, y=nuc.div,col=pro.div.fac))+
  geom_point()+  scale_color_discrete()+theme_bw()+
  facet_wrap(~chr)+ggtitle("distribution of nucleotide and protein diversity for GxE genes")

plot of chunk unnamed-chunk-1

outall.div$chr<-substring(outall.div$Gene, 3, 3)
test<-outall.div
outall.div$pro.div[outall.div$pro.div<0]<-NA
outall.div<-outall.div[complete.cases(outall.div),]
ggplot(outall.div, aes(y=nuc.div, x=Start))+
  geom_point()+facet_wrap(~chr)+theme_bw()+
  ggtitle("nucleotide divergence between tsu and kas")

plot of chunk unnamed-chunk-1

ggplot(outall.div, aes(y=pro.div, x=Start))+
  geom_point()+facet_wrap(~chr)+theme_bw()+
  ggtitle("protein divergence between tsu and kas")

plot of chunk unnamed-chunk-1

ggplot(outall.div, aes(y=nuc.div, x=pro.div))+
  geom_point()+theme_bw()+
  geom_abline(intercept = 0, slope = 1)+
  ggtitle("correlation of protein and nucleotide diversity")

plot of chunk unnamed-chunk-1

# ch4.out<-outall.div[outall.div$chr==4 & outall.div$Start<5e+06,]
# 
# ggplot(ch4.out, aes(y=nuc.div, x=Start))+
#   geom_point()+facet_wrap(~chr)+theme_bw()+
#   ggtitle("nucleotide divergence between tsu and kas")
# 
# ggplot(outall.div, aes(y=pro.div, x=Start))+
#   geom_point()+facet_wrap(~chr)+theme_bw()+
#   ggtitle("nucleotide divergence between tsu and kas")