Load stuff

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.3.2
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Warning: package 'ggplot2' was built under R version 3.3.2
## Warning: package 'tidyr' was built under R version 3.3.2
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
library(cowplot)
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
## 
##     ggsave

Get teo and maize data

data<-read.csv("~/Downloads/mz_teo_theta.csv")
summary(data)
##      seqbp         ThetaWteo          ThetaPiteo         ThetaHteo        
##  Min.   : 5000   Min.   :0.000000   Min.   :0.000000   Min.   :0.000e+00  
##  1st Qu.: 6056   1st Qu.:0.003651   1st Qu.:0.003683   1st Qu.:7.730e-06  
##  Median : 7309   Median :0.005822   Median :0.006283   Median :5.825e-04  
##  Mean   : 7394   Mean   :0.006535   Mean   :0.007227   Mean   :1.337e-03  
##  3rd Qu.: 8698   3rd Qu.:0.008666   3rd Qu.:0.009816   3rd Qu.:2.081e-03  
##  Max.   :10000   Max.   :0.030040   Max.   :0.033963   Max.   :2.180e-02  
##                                                                           
##     S_rhoteo         rho_teo          ThetaWMZ          ThetaPiMZ       
##  Min.   :  0.00   Min.   :0.0000   Min.   :0.000000   Min.   :0.000000  
##  1st Qu.: 20.00   1st Qu.:0.0003   1st Qu.:0.002945   1st Qu.:0.002373  
##  Median : 44.00   Median :0.0008   Median :0.004700   Median :0.004780  
##  Mean   : 55.24   Mean   :0.0073   Mean   :0.005335   Mean   :0.005803  
##  3rd Qu.: 79.00   3rd Qu.:0.0027   3rd Qu.:0.007076   3rd Qu.:0.008234  
##  Max.   :427.00   Max.   :0.2684   Max.   :0.025585   Max.   :0.031461  
##  NA's   :5        NA's   :1152                                          
##     ThetaHMZ            S_rhoMZ           rho_MZ      
##  Min.   :0.0000000   Min.   :  0.00   Min.   :0.0000  
##  1st Qu.:0.0000023   1st Qu.: 35.00   1st Qu.:0.0001  
##  Median :0.0006446   Median : 80.00   Median :0.0003  
##  Mean   :0.0015826   Mean   : 97.78   Mean   :0.0016  
##  3rd Qu.:0.0024732   3rd Qu.:142.00   3rd Qu.:0.0007  
##  Max.   :0.0222896   Max.   :680.00   Max.   :0.2684  
##                      NA's   :5        NA's   :409
teo_pi=mean(data$ThetaPiteo)
teo_rho=mean(data$rho_teo,na.rm=T)
mz_pi=mean(data$ThetaPiMZ)
mz_rho=mean(data$rho_MZ,na.rm=T)

gdata<-read.csv("~/Downloads/gene_theta.csv")
summary(gdata)
##      seqbp        ThetaWteo          ThetaPiteo         ThetaHteo        
##  Min.   :1823   Min.   :0.002729   Min.   :0.002251   Min.   :0.0002420  
##  1st Qu.:2741   1st Qu.:0.002799   1st Qu.:0.002347   1st Qu.:0.0004377  
##  Median :3658   Median :0.002869   Median :0.002442   Median :0.0006334  
##  Mean   :3658   Mean   :0.002869   Mean   :0.002442   Mean   :0.0006334  
##  3rd Qu.:4576   3rd Qu.:0.002939   3rd Qu.:0.002538   3rd Qu.:0.0008291  
##  Max.   :5494   Max.   :0.003009   Max.   :0.002634   Max.   :0.0010248  
##                                                                          
##     S_rhoteo      rho_teo          ThetaWMZ          ThetaPiMZ        
##  Min.   :1.0   Min.   :0.2684   Min.   :0.001482   Min.   :0.0009675  
##  1st Qu.:1.5   1st Qu.:0.2684   1st Qu.:0.001812   1st Qu.:0.0012352  
##  Median :2.0   Median :0.2684   Median :0.002141   Median :0.0015028  
##  Mean   :2.0   Mean   :0.2684   Mean   :0.002141   Mean   :0.0015028  
##  3rd Qu.:2.5   3rd Qu.:0.2684   3rd Qu.:0.002470   3rd Qu.:0.0017705  
##  Max.   :3.0   Max.   :0.2684   Max.   :0.002799   Max.   :0.0020381  
##                NA's   :1                                              
##     ThetaHMZ            S_rhoMZ       rho_MZ         
##  Min.   :0.000e+00   Min.   : 4   Min.   :0.0000650  
##  1st Qu.:4.025e-06   1st Qu.: 8   1st Qu.:0.0003932  
##  Median :8.050e-06   Median :12   Median :0.0007215  
##  Mean   :8.050e-06   Mean   :12   Mean   :0.0007215  
##  3rd Qu.:1.208e-05   3rd Qu.:16   3rd Qu.:0.0010497  
##  Max.   :1.610e-05   Max.   :20   Max.   :0.0013780  
## 
gteo_pi=mean(gdata$ThetaPiteo)
gteo_rho=mean(gdata$rho_teo,na.rm=T)
gmz_pi=mean(gdata$ThetaPiMZ[2])
gmz_rho=mean(gdata$rho_MZ[2],na.rm=T)
gdata
##   seqbp   ThetaWteo  ThetaPiteo   ThetaHteo S_rhoteo  rho_teo    ThetaWMZ
## 1  5494 0.003008846 0.002633691 0.000242022        3 0.268435 0.002798762
## 2  1823 0.002728634 0.002250982 0.001024756        1       NA 0.001482485
##     ThetaPiMZ ThetaHMZ S_rhoMZ   rho_MZ
## 1 0.002038096 1.61e-05      20 0.000065
## 2 0.000967526 0.00e+00       4 0.001378
ggplot(data,aes(ThetaPiteo-ThetaPiMZ))+
  geom_histogram(fill="green",alpha=0.5) +
  geom_vline(xintercept=gteo_pi-gmz_pi,color="green",linetype=2,size=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# for 7317bp window like gene, our values are:
ne<-teo_pi/(4*3E-8)
sim_theta=teo_pi*7317
sim_rho=teo_rho/teo_pi*sim_theta
div_time=15500/(4*ne)
migration=1.1E-5*ne*4

#calculate growth rate
#Nt=N0e^-{rt}
#0.05*123000=2.98*123000*exp(-r*div_time)
r=log(0.05/2.98)/div_time

sim_command=paste("~/src/msdir/ms 36 100  -t ",sim_theta, "-r ", sim_rho, "7317 -I 2 13 23", migration, " -n 2 2.98 -ej ", div_time, "2 1 -g 2",r ,"-eG", div_time, "0  | ~/src/msstats/src/msstats -I 2 13 23 | cut -f 1-3,6-8 " )
print(sim_command)
## [1] "~/src/msdir/ms 36 100  -t  52.8824192522769 -r  53.657425083146 7317 -I 2 13 23 2.65002328720808  -n 2 2.98 -ej  0.064339057253957 2 1 -g 2 -63.5330349640755 -eG 0.064339057253957 0  | ~/src/msstats/src/msstats -I 2 13 23 | cut -f 1-3,6-8 "
sim<-read.table(text=system(sim_command,intern=TRUE),header=T)
summary(sim)
##       rep             pop            S             theta      
##  Min.   : 0.00   Min.   :0.0   Min.   :102.0   Min.   :32.87  
##  1st Qu.:24.75   1st Qu.:0.0   1st Qu.:165.0   1st Qu.:53.17  
##  Median :49.50   Median :0.5   Median :218.0   Median :64.03  
##  Mean   :49.50   Mean   :0.5   Mean   :215.1   Mean   :62.63  
##  3rd Qu.:74.25   3rd Qu.:1.0   3rd Qu.:260.2   3rd Qu.:70.72  
##  Max.   :99.00   Max.   :1.0   Max.   :320.0   Max.   :86.70  
##        pi            thetaH     
##  Min.   :30.54   Min.   :24.33  
##  1st Qu.:48.54   1st Qu.:42.90  
##  Median :55.32   Median :52.56  
##  Mean   :55.87   Mean   :52.55  
##  3rd Qu.:63.97   3rd Qu.:59.91  
##  Max.   :81.69   Max.   :89.47

Compare

sim_teo=filter(sim,pop==0)
sim_mz=filter(sim,pop==1)
ggplot(sim,aes(pi/7317))+
  geom_histogram(data=subset(sim,pop==0),fill="green",alpha=0.5)+
  geom_histogram(data=subset(sim,pop==1),fill="purple",alpha=0.5)+
  geom_vline(xintercept=teo_pi,color="green",linetype=2)+
  geom_vline(xintercept=mz_pi,color="purple",linetype=2)  +
  geom_vline(xintercept=gmz_pi,color="purple",size=2)  +
  geom_vline(xintercept=gteo_pi,color="green",size=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

summary(sim)
##       rep             pop            S             theta      
##  Min.   : 0.00   Min.   :0.0   Min.   :102.0   Min.   :32.87  
##  1st Qu.:24.75   1st Qu.:0.0   1st Qu.:165.0   1st Qu.:53.17  
##  Median :49.50   Median :0.5   Median :218.0   Median :64.03  
##  Mean   :49.50   Mean   :0.5   Mean   :215.1   Mean   :62.63  
##  3rd Qu.:74.25   3rd Qu.:1.0   3rd Qu.:260.2   3rd Qu.:70.72  
##  Max.   :99.00   Max.   :1.0   Max.   :320.0   Max.   :86.70  
##        pi            thetaH     
##  Min.   :30.54   Min.   :24.33  
##  1st Qu.:48.54   1st Qu.:42.90  
##  Median :55.32   Median :52.56  
##  Mean   :55.87   Mean   :52.55  
##  3rd Qu.:63.97   3rd Qu.:59.91  
##  Max.   :81.69   Max.   :89.47
  #geom_vline(xintercept=teo_pi*7317)