Load stuff
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.3.2
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Warning: package 'ggplot2' was built under R version 3.3.2
## Warning: package 'tidyr' was built under R version 3.3.2
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
library(cowplot)
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
##
## ggsave
Get teo and maize data
data<-read.csv("~/Downloads/mz_teo_theta.csv")
summary(data)
## seqbp ThetaWteo ThetaPiteo ThetaHteo
## Min. : 5000 Min. :0.000000 Min. :0.000000 Min. :0.000e+00
## 1st Qu.: 6056 1st Qu.:0.003651 1st Qu.:0.003683 1st Qu.:7.730e-06
## Median : 7309 Median :0.005822 Median :0.006283 Median :5.825e-04
## Mean : 7394 Mean :0.006535 Mean :0.007227 Mean :1.337e-03
## 3rd Qu.: 8698 3rd Qu.:0.008666 3rd Qu.:0.009816 3rd Qu.:2.081e-03
## Max. :10000 Max. :0.030040 Max. :0.033963 Max. :2.180e-02
##
## S_rhoteo rho_teo ThetaWMZ ThetaPiMZ
## Min. : 0.00 Min. :0.0000 Min. :0.000000 Min. :0.000000
## 1st Qu.: 20.00 1st Qu.:0.0003 1st Qu.:0.002945 1st Qu.:0.002373
## Median : 44.00 Median :0.0008 Median :0.004700 Median :0.004780
## Mean : 55.24 Mean :0.0073 Mean :0.005335 Mean :0.005803
## 3rd Qu.: 79.00 3rd Qu.:0.0027 3rd Qu.:0.007076 3rd Qu.:0.008234
## Max. :427.00 Max. :0.2684 Max. :0.025585 Max. :0.031461
## NA's :5 NA's :1152
## ThetaHMZ S_rhoMZ rho_MZ
## Min. :0.0000000 Min. : 0.00 Min. :0.0000
## 1st Qu.:0.0000023 1st Qu.: 35.00 1st Qu.:0.0001
## Median :0.0006446 Median : 80.00 Median :0.0003
## Mean :0.0015826 Mean : 97.78 Mean :0.0016
## 3rd Qu.:0.0024732 3rd Qu.:142.00 3rd Qu.:0.0007
## Max. :0.0222896 Max. :680.00 Max. :0.2684
## NA's :5 NA's :409
teo_pi=mean(data$ThetaPiteo)
teo_rho=mean(data$rho_teo,na.rm=T)
mz_pi=mean(data$ThetaPiMZ)
mz_rho=mean(data$rho_MZ,na.rm=T)
gdata<-read.csv("~/Downloads/gene_theta.csv")
summary(gdata)
## seqbp ThetaWteo ThetaPiteo ThetaHteo
## Min. :1823 Min. :0.002729 Min. :0.002251 Min. :0.0002420
## 1st Qu.:2741 1st Qu.:0.002799 1st Qu.:0.002347 1st Qu.:0.0004377
## Median :3658 Median :0.002869 Median :0.002442 Median :0.0006334
## Mean :3658 Mean :0.002869 Mean :0.002442 Mean :0.0006334
## 3rd Qu.:4576 3rd Qu.:0.002939 3rd Qu.:0.002538 3rd Qu.:0.0008291
## Max. :5494 Max. :0.003009 Max. :0.002634 Max. :0.0010248
##
## S_rhoteo rho_teo ThetaWMZ ThetaPiMZ
## Min. :1.0 Min. :0.2684 Min. :0.001482 Min. :0.0009675
## 1st Qu.:1.5 1st Qu.:0.2684 1st Qu.:0.001812 1st Qu.:0.0012352
## Median :2.0 Median :0.2684 Median :0.002141 Median :0.0015028
## Mean :2.0 Mean :0.2684 Mean :0.002141 Mean :0.0015028
## 3rd Qu.:2.5 3rd Qu.:0.2684 3rd Qu.:0.002470 3rd Qu.:0.0017705
## Max. :3.0 Max. :0.2684 Max. :0.002799 Max. :0.0020381
## NA's :1
## ThetaHMZ S_rhoMZ rho_MZ
## Min. :0.000e+00 Min. : 4 Min. :0.0000650
## 1st Qu.:4.025e-06 1st Qu.: 8 1st Qu.:0.0003932
## Median :8.050e-06 Median :12 Median :0.0007215
## Mean :8.050e-06 Mean :12 Mean :0.0007215
## 3rd Qu.:1.208e-05 3rd Qu.:16 3rd Qu.:0.0010497
## Max. :1.610e-05 Max. :20 Max. :0.0013780
##
gteo_pi=mean(gdata$ThetaPiteo)
gteo_rho=mean(gdata$rho_teo,na.rm=T)
gmz_pi=mean(gdata$ThetaPiMZ[2])
gmz_rho=mean(gdata$rho_MZ[2],na.rm=T)
gdata
## seqbp ThetaWteo ThetaPiteo ThetaHteo S_rhoteo rho_teo ThetaWMZ
## 1 5494 0.003008846 0.002633691 0.000242022 3 0.268435 0.002798762
## 2 1823 0.002728634 0.002250982 0.001024756 1 NA 0.001482485
## ThetaPiMZ ThetaHMZ S_rhoMZ rho_MZ
## 1 0.002038096 1.61e-05 20 0.000065
## 2 0.000967526 0.00e+00 4 0.001378
ggplot(data,aes(ThetaPiteo-ThetaPiMZ))+
geom_histogram(fill="green",alpha=0.5) +
geom_vline(xintercept=gteo_pi-gmz_pi,color="green",linetype=2,size=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# for 7317bp window like gene, our values are:
ne<-teo_pi/(4*3E-8)
sim_theta=teo_pi*7317
sim_rho=teo_rho/teo_pi*sim_theta
div_time=15500/(4*ne)
migration=1.1E-5*ne*4
#calculate growth rate
#Nt=N0e^-{rt}
#0.05*123000=2.98*123000*exp(-r*div_time)
r=log(0.05/2.98)/div_time
sim_command=paste("~/src/msdir/ms 36 100 -t ",sim_theta, "-r ", sim_rho, "7317 -I 2 13 23", migration, " -n 2 2.98 -ej ", div_time, "2 1 -g 2",r ,"-eG", div_time, "0 | ~/src/msstats/src/msstats -I 2 13 23 | cut -f 1-3,6-8 " )
print(sim_command)
## [1] "~/src/msdir/ms 36 100 -t 52.8824192522769 -r 53.657425083146 7317 -I 2 13 23 2.65002328720808 -n 2 2.98 -ej 0.064339057253957 2 1 -g 2 -63.5330349640755 -eG 0.064339057253957 0 | ~/src/msstats/src/msstats -I 2 13 23 | cut -f 1-3,6-8 "
sim<-read.table(text=system(sim_command,intern=TRUE),header=T)
summary(sim)
## rep pop S theta
## Min. : 0.00 Min. :0.0 Min. :102.0 Min. :32.87
## 1st Qu.:24.75 1st Qu.:0.0 1st Qu.:165.0 1st Qu.:53.17
## Median :49.50 Median :0.5 Median :218.0 Median :64.03
## Mean :49.50 Mean :0.5 Mean :215.1 Mean :62.63
## 3rd Qu.:74.25 3rd Qu.:1.0 3rd Qu.:260.2 3rd Qu.:70.72
## Max. :99.00 Max. :1.0 Max. :320.0 Max. :86.70
## pi thetaH
## Min. :30.54 Min. :24.33
## 1st Qu.:48.54 1st Qu.:42.90
## Median :55.32 Median :52.56
## Mean :55.87 Mean :52.55
## 3rd Qu.:63.97 3rd Qu.:59.91
## Max. :81.69 Max. :89.47
Compare
sim_teo=filter(sim,pop==0)
sim_mz=filter(sim,pop==1)
ggplot(sim,aes(pi/7317))+
geom_histogram(data=subset(sim,pop==0),fill="green",alpha=0.5)+
geom_histogram(data=subset(sim,pop==1),fill="purple",alpha=0.5)+
geom_vline(xintercept=teo_pi,color="green",linetype=2)+
geom_vline(xintercept=mz_pi,color="purple",linetype=2) +
geom_vline(xintercept=gmz_pi,color="purple",size=2) +
geom_vline(xintercept=gteo_pi,color="green",size=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
summary(sim)
## rep pop S theta
## Min. : 0.00 Min. :0.0 Min. :102.0 Min. :32.87
## 1st Qu.:24.75 1st Qu.:0.0 1st Qu.:165.0 1st Qu.:53.17
## Median :49.50 Median :0.5 Median :218.0 Median :64.03
## Mean :49.50 Mean :0.5 Mean :215.1 Mean :62.63
## 3rd Qu.:74.25 3rd Qu.:1.0 3rd Qu.:260.2 3rd Qu.:70.72
## Max. :99.00 Max. :1.0 Max. :320.0 Max. :86.70
## pi thetaH
## Min. :30.54 Min. :24.33
## 1st Qu.:48.54 1st Qu.:42.90
## Median :55.32 Median :52.56
## Mean :55.87 Mean :52.55
## 3rd Qu.:63.97 3rd Qu.:59.91
## Max. :81.69 Max. :89.47
#geom_vline(xintercept=teo_pi*7317)