library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1     ✔ purrr   0.3.2
## ✔ tibble  2.1.3     ✔ dplyr   0.8.3
## ✔ tidyr   1.0.0     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.4.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(cowplot)
## 
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
##   default ggplot2 theme anymore. To recover the previous
##   behavior, execute:
##   theme_set(theme_cowplot())
## ********************************************************

params

n=20 #sample size
rho=2 #deletion rate 
theta_2=1 #mutation rate

functions

sum2<-function(s,n,rho)
{  
  sum(sapply(0:(n-s-1), function(j) choose(n-j-2,s-1)*(j+1)/(j+1+rho)))
}
sum1<-function(k,rho,n){
  sum(sapply(1:(k-1), function(s)  (1/s) * choose(n-1,s)^-1 * sum2(s,n,rho)))
}
sum3<-function(k,rho,n){
  sum(sapply(1:(k-1), function(s)  (k-s) * choose(n-1,s)^-1 * sum2(s,n,rho)))
}

# More from Tajima
a1=sum(sapply(1:(n-1), function(i) 1/i ))
a2=sum(sapply(1:(n-1), function(i) 1/i^2 ))
b1=(n+1)/(3*(n-1))
b2=2*(n^2+n+3)/(9*n*(n-1))
c1=b1-1/a1
c2=b2-(n+2)/(a1*n)+a2/a1^2
e1=c1/a1
e2=c2/(a1^2+a2)

Watterson’s estimator (eq. 12)

thetaW=sapply( 2:(n-1), function(k) k/n * theta_2*sum1(k,rho,n) / sum(sapply(1:(k-1),function(s) 1/s)))

Segsites

S=thetaW*a1

Tajima’s estimator (eq. 13)

thetaPI=sapply(2:(n-1), function(k)  theta_2*(2/(k*(k-1)))*(k/n) * sum3(k,rho,n)) 

TajD

tajd=(thetaPI-thetaW)/(e1*S+e2*S*(S-1))^.5

#data frame

ks<-2:(n-1)
divdata<-data.frame(rep(n-ks,2),c(thetaW,thetaPI),paste(c(rep("W",length(thetaW)),rep("pi",length(thetaPI)))))
colnames(divdata)=c("del_freq","theta","type")
tajddata<-data.frame(n-ks,tajd)
colnames(tajddata)=c("del_freq","TajD")

#plots

div_plot<- ggplot(divdata,aes(x=del_freq,y=theta,color=type))+
  geom_point()+
  xlab("Deletion Frequency")+
  ylab("Diversity")+
  scale_color_discrete(name = "Statistic", labels = c(expression(theta[pi]),expression(theta[w])))+theme_bw()+
  theme(legend.text=element_text(size=18),legend.title=element_text(size=20),axis.text=element_text(size=18),axis.title=element_text(size=20))+
  theme(legend.position = c(0.8, 0.85))

tajd_plot<- ggplot(tajddata,aes(x=del_freq,y=tajd))+
  geom_point()+theme_bw()+
  xlab("Deletion Frequency")+
  ylab("Tajima's D")+
  theme(legend.text=element_text(size=18),legend.title=element_text(size=20),axis.text=element_text(size=18),axis.title=element_text(size=20))

plot_grid(div_plot,tajd_plot,nrow=1)

Segreagting deletion and recessive phenotype

pa=0.05
pd=0:50/100
pA=1-pa-pd

rec=pa^2+2*pa*pd
norm_rec=(pa/(pa+pA))^2

data.frame(rec/norm_rec,pd) %>% 
  ggplot(aes(y=rec.norm_rec,x=pd))+
  theme_bw()+
  geom_point()+
  ylab("Relative Abundance\n of Recessive Phenotype")+
  xlab("Frequency of Segregating Deletion")+
  theme(legend.text=element_text(size=18),legend.title=element_text(size=20),axis.text=element_text(size=18),axis.title=element_text(size=20))