library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1 ✔ purrr 0.3.2
## ✔ tibble 2.1.3 ✔ dplyr 0.8.3
## ✔ tidyr 1.0.0 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(cowplot)
##
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
## default ggplot2 theme anymore. To recover the previous
## behavior, execute:
## theme_set(theme_cowplot())
## ********************************************************
params
n=20 #sample size
rho=2 #deletion rate
theta_2=1 #mutation rate
sum2<-function(s,n,rho)
{
sum(sapply(0:(n-s-1), function(j) choose(n-j-2,s-1)*(j+1)/(j+1+rho)))
}
sum1<-function(k,rho,n){
sum(sapply(1:(k-1), function(s) (1/s) * choose(n-1,s)^-1 * sum2(s,n,rho)))
}
sum3<-function(k,rho,n){
sum(sapply(1:(k-1), function(s) (k-s) * choose(n-1,s)^-1 * sum2(s,n,rho)))
}
# More from Tajima
a1=sum(sapply(1:(n-1), function(i) 1/i ))
a2=sum(sapply(1:(n-1), function(i) 1/i^2 ))
b1=(n+1)/(3*(n-1))
b2=2*(n^2+n+3)/(9*n*(n-1))
c1=b1-1/a1
c2=b2-(n+2)/(a1*n)+a2/a1^2
e1=c1/a1
e2=c2/(a1^2+a2)
thetaW=sapply( 2:(n-1), function(k) k/n * theta_2*sum1(k,rho,n) / sum(sapply(1:(k-1),function(s) 1/s)))
S=thetaW*a1
thetaPI=sapply(2:(n-1), function(k) theta_2*(2/(k*(k-1)))*(k/n) * sum3(k,rho,n))
tajd=(thetaPI-thetaW)/(e1*S+e2*S*(S-1))^.5
#data frame
ks<-2:(n-1)
divdata<-data.frame(rep(n-ks,2),c(thetaW,thetaPI),paste(c(rep("W",length(thetaW)),rep("pi",length(thetaPI)))))
colnames(divdata)=c("del_freq","theta","type")
tajddata<-data.frame(n-ks,tajd)
colnames(tajddata)=c("del_freq","TajD")
#plots
div_plot<- ggplot(divdata,aes(x=del_freq,y=theta,color=type))+
geom_point()+
xlab("Deletion Frequency")+
ylab("Diversity")+
scale_color_discrete(name = "Statistic", labels = c(expression(theta[pi]),expression(theta[w])))+theme_bw()+
theme(legend.text=element_text(size=18),legend.title=element_text(size=20),axis.text=element_text(size=18),axis.title=element_text(size=20))+
theme(legend.position = c(0.8, 0.85))
tajd_plot<- ggplot(tajddata,aes(x=del_freq,y=tajd))+
geom_point()+theme_bw()+
xlab("Deletion Frequency")+
ylab("Tajima's D")+
theme(legend.text=element_text(size=18),legend.title=element_text(size=20),axis.text=element_text(size=18),axis.title=element_text(size=20))
plot_grid(div_plot,tajd_plot,nrow=1)
Segreagting deletion and recessive phenotype
pa=0.05
pd=0:50/100
pA=1-pa-pd
rec=pa^2+2*pa*pd
norm_rec=(pa/(pa+pA))^2
data.frame(rec/norm_rec,pd) %>%
ggplot(aes(y=rec.norm_rec,x=pd))+
theme_bw()+
geom_point()+
ylab("Relative Abundance\n of Recessive Phenotype")+
xlab("Frequency of Segregating Deletion")+
theme(legend.text=element_text(size=18),legend.title=element_text(size=20),axis.text=element_text(size=18),axis.title=element_text(size=20))