knitr::opts_knit$set(root.dir = '../')
For maize, we simulated data from \(N_e=10,000\) with bottleneck and growth following Beissinger et al. 2016. For Europeans, we follow a slightly modified model of Torres et al. 2017 that excludes the ancestral expansion, and scales ancestral \(N_e=10,000\), but the gene model is as above.
We simulate neutral \(\pi\) and \(\xi_1\) (singletons) shown every 100 generations in each of 24 4Kb windows centered around a 4kb “gene” which has neutral () and deleterious () mutations. We run simulations with and without background selection. To simulate background selection we draaw deleterious mutations from a gamma distribution with some mean s (see below).
For each parameter set (select \(\times\) statistic \(\times\) demography) we then want to test the effect of generation and genetation \(\times\) window on the statistic and do so with a simple ANOVA.
library(tidyverse)
library(ggjoy)
library(viridis)
library(cowplot)
Read in data from pylibseq.
read_sim<-function(pidata,xidata,neutral=FALSE){
#read data
pi<-as.tibble(read.csv(pidata,header=F))
pi<-mutate(pi,stat=rep("pi",length(pi[,1])))
xi<-as.tibble(read.csv(xidata,header=F))
xi<-mutate(xi,stat=rep("xi",length(xi[,1])))
#munge into single tibble of means
data<-rbind(xi,pi) %>%
`colnames<-`(c("gen", 1:25, "sim", "stat")) %>%
gather(window,value,2:26) %>%
mutate(gen=gen-100000) %>%
group_by(gen,stat,window) %>%
summarize(mean_val=mean(value)) %>%
mutate(window=(as.numeric(window)-13)*4000*3E-5,mean_val=mean_val)
data=mutate(data,mean_val=if_else(window==0.00,mean_val*4,mean_val))
if(neutral==TRUE){
data=mutate(data,mean_val=if_else(window==0.00,mean_val/4,mean_val))
}
return(data)
}
Read in only a single simulation rather than means. For ANOVA later.
read_single_sim<-function(pidata,xidata,neutral=FALSE){
#read data
pi<-as.tibble(read.csv(pidata,header=F))
pi<-mutate(pi,stat=rep("pi",length(pi[,1])))
xi<-as.tibble(read.csv(xidata,header=F))
xi<-mutate(xi,stat=rep("xi",length(xi[,1])))
#munge into single tibble of means
data<-rbind(xi,pi) %>%
`colnames<-`(c("gen", 1:25, "sim", "stat")) %>%
gather(window,value,2:26) %>%
mutate(gen=gen-100000) %>%
mutate(window=(as.numeric(window)-13)*4000*3E-5)
data=mutate(data,value=if_else(window==0.00,value*4,value))
if(neutral==TRUE){
data=mutate(data,value=if_else(window==0.00,value/4,value))
}
return(data)
}
Merge data with BGS with a neutral sim to standardize.
merge_sel_neutral <- function(sel_sim_data,neut_sim_data){
merged <- merge(sel_sim_data,neut_sim_data,by=c('gen','stat','window'))
merged$mean_val <- merged$mean_val.x/merged$mean_val.y
merged
}
Merge individual BGS sims with paired neutral sim.
merge_single_sel_neutral <- function(sel_sim_data,neut_sim_data){
merged <- merge(sel_sim_data,neut_sim_data,by=c('gen','sim','stat','window'))
merged$value <- merged$value.x/merged$value.y
merged
}
ANOVA on sim results.
sim_anova <- function(somedata,stats){
sep_parm <- filter(somedata,stat==stats,!is.na(value),value != Inf)
sep_parm$gen <- as.factor(sep_parm$gen)
sep_parm$window <- as.factor(sep_parm$window)
fit <- lm(value ~ gen + window + gen*window, data=sep_parm,na.action = na.exclude)
print(anova(fit))
}
Plot data
simplot<-function(somedata,mystat,label,bneck){
ggplot(filter(somedata,gen %% 100 <1,stat==mystat,gen>bneck), aes(x=window, y=mean_val, group = gen,color=gen)) +
scale_color_viridis(name='Generation') +
background_grid(major = "xy", minor = "none") +
geom_line() +
ylab(label) + xlab("cM") +
scale_x_continuous(breaks = seq(-1.5,1.5,0.5))
}
Maize
maize_neutral <-read_sim("sims/results/sim_neutralPi_maize_neutral2.csv","sims/results/sim_singleton_maize_neutral2.csv",neutral=TRUE)
## Warning: package 'bindrcpp' was built under R version 3.3.2
maize_neutral_anova <-read_single_sim("sims/results/sim_neutralPi_maize_neutral2.csv","sims/results/sim_singleton_maize_neutral2.csv",neutral=TRUE)
simplot(maize_neutral,"pi",expression(bar(pi)),2200)
European
European_neutral <-read_sim("sims/results/sim_neutralPi_European_neutral2.csv","sims/results/sim_singleton_European_neutral2.csv",neutral=TRUE)
European_neutral_anova <-read_single_sim("sims/results/sim_neutralPi_European_neutral2.csv","sims/results/sim_singleton_European_neutral2.csv",neutral=TRUE)
simplot(European_neutral,"pi",expression(bar(pi)),2200)
Set mean to \(s=5\times 10^{-5}\)
maize<-read_sim("sims/results/sim_neutralPi_maize_s5E-5.csv","sims/results/sim_singleton_maize_s5E-5.csv")
maize_anova<-read_single_sim("sims/results/sim_neutralPi_maize_s5E-5.csv","sims/results/sim_singleton_maize_s5E-5.csv")
maize_merged <- merge_sel_neutral(maize,maize_neutral)
maize_merged_anova <- merge_single_sel_neutral(maize_anova,maize_neutral_anova)
\(\pi\)
simplot(maize_merged,"pi",expression(bar(pi)/bar(pi)[neut]),2200)
sim_anova(maize_merged_anova,'pi')
## Analysis of Variance Table
##
## Response: value
## Df Sum Sq Mean Sq F value Pr(>F)
## gen 32 1.4 0.043 0.5005 0.9917
## window 24 1918.5 79.938 926.3525 <2e-16 ***
## gen:window 768 11.1 0.014 0.1672 1.0000
## Residuals 81675 7048.0 0.086
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
\(\xi_1\)
simplot(maize_merged,"xi",expression(bar(xi[i])/bar(xi[i[neut]])),2200)
sim_anova(maize_merged_anova,'xi')
## Analysis of Variance Table
##
## Response: value
## Df Sum Sq Mean Sq F value Pr(>F)
## gen 32 2195 68.594 146.0606 <2e-16 ***
## window 24 10 0.407 0.8665 0.6507
## gen:window 768 338 0.440 0.9372 0.8915
## Residuals 81519 38283 0.470
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
European<-read_sim("sims/results/sim_neutralPi_European_s5E-5.csv","sims/results/sim_singleton_European_s5E-5.csv")
European_merged <- merge_sel_neutral(European,European_neutral)
European_anova<-read_single_sim("sims/results/sim_neutralPi_European_s5E-5.csv","sims/results/sim_singleton_European_s5E-5.csv")
European_merged_anova <- merge_single_sel_neutral(European_anova,European_neutral_anova)
\(\pi\)
simplot(European_merged,"pi",expression(bar(pi)/bar(pi)[neut]),100)
sim_anova(European_merged_anova,'pi')
## Analysis of Variance Table
##
## Response: value
## Df Sum Sq Mean Sq F value Pr(>F)
## gen 32 6.6 0.205 1.7479 0.00553 **
## window 24 1889.0 78.708 670.7534 < 2e-16 ***
## gen:window 768 19.5 0.025 0.2164 1.00000
## Residuals 81675 9583.9 0.117
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
\(\xi_1\)
simplot(European_merged,"xi",expression(bar(xi[i])/bar(xi[i[neut]])),100)
sim_anova(European_merged_anova,'xi')
## Analysis of Variance Table
##
## Response: value
## Df Sum Sq Mean Sq F value Pr(>F)
## gen 32 142.5 4.4520 16.4619 < 2.2e-16 ***
## window 24 25.8 1.0749 3.9745 1.846e-10 ***
## gen:window 768 190.9 0.2486 0.9193 0.9454
## Residuals 81675 22088.4 0.2704
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
\(s=5\times 10^{-4}\)
maize<-read_sim("sims/results/sim_neutralPi_maize_s5E-4.csv","sims/results/sim_singleton_maize_s5E-4.csv")
maize_anova<-read_single_sim("sims/results/sim_neutralPi_maize_s5E-4.csv","sims/results/sim_singleton_maize_s5E-4.csv")
maize_merged <- merge_sel_neutral(maize,maize_neutral)
maize_merged_anova <- merge_single_sel_neutral(maize_anova,maize_neutral_anova)
\(\pi\)
simplot(maize_merged,"pi",expression(bar(pi)/bar(pi)[neut]),2200)
sim_anova(maize_merged_anova,'pi')
## Analysis of Variance Table
##
## Response: value
## Df Sum Sq Mean Sq F value Pr(>F)
## gen 32 0.6 0.018 0.2866 1
## window 24 5592.5 233.021 3722.7011 <2e-16 ***
## gen:window 768 8.1 0.010 0.1675 1
## Residuals 81675 5112.4 0.063
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
\(\xi_1\)
simplot(maize_merged,"xi",expression(bar(xi[i])/bar(xi[i[neut]])),2200)
sim_anova(maize_merged_anova,'xi')
## Analysis of Variance Table
##
## Response: value
## Df Sum Sq Mean Sq F value Pr(>F)
## gen 32 1874 58.556 138.8111 < 2.2e-16 ***
## window 24 47 1.955 4.6348 3.514e-13 ***
## gen:window 768 382 0.497 1.1781 0.0004822 ***
## Residuals 81519 34388 0.422
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
European<-read_sim("sims/results/sim_neutralPi_European_s5E-4.csv","sims/results/sim_singleton_European_s5E-4.csv")
European_merged <- merge_sel_neutral(European,European_neutral)
European_anova<-read_single_sim("sims/results/sim_neutralPi_European_s5E-4.csv","sims/results/sim_singleton_European_s5E-4.csv")
European_merged_anova <- merge_single_sel_neutral(European_anova,European_neutral_anova)
\(\pi\)
simplot(European_merged,"pi",expression(bar(pi)/bar(pi)[neut]),100)
sim_anova(European_merged_anova,'pi')
## Analysis of Variance Table
##
## Response: value
## Df Sum Sq Mean Sq F value Pr(>F)
## gen 32 2.7 0.084 0.9819 0.4957
## window 24 5714.5 238.104 2784.8759 <2e-16 ***
## gen:window 768 19.4 0.025 0.2950 1.0000
## Residuals 81675 6983.1 0.085
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
\(\xi_1\)
simplot(European_merged,"xi",expression(bar(xi[i])/bar(xi[i[neut]])),100)
sim_anova(European_merged_anova,'xi')
## Analysis of Variance Table
##
## Response: value
## Df Sum Sq Mean Sq F value Pr(>F)
## gen 32 166.5 5.2037 20.1613 < 2.2e-16 ***
## window 24 32.4 1.3508 5.2337 9.943e-16 ***
## gen:window 768 182.5 0.2377 0.9209 0.9418
## Residuals 81675 21080.8 0.2581
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
\(s=5\times 10^{-3}\) ###Maize
maize<-read_sim("sims/results/sim_neutralPi_maize_s5E-3.csv","sims/results/sim_singleton_maize_s5E-3.csv")
maize_anova<-read_single_sim("sims/results/sim_neutralPi_maize_s5E-3.csv","sims/results/sim_singleton_maize_s5E-3.csv")
maize_merged <- merge_sel_neutral(maize,maize_neutral)
maize_merged_anova <- merge_single_sel_neutral(maize_anova,maize_neutral_anova)
\(\pi\)
simplot(maize_merged,"pi",expression(bar(pi)/bar(pi)[neut]),2200)
sim_anova(maize_merged_anova,'pi')
## Analysis of Variance Table
##
## Response: value
## Df Sum Sq Mean Sq F value Pr(>F)
## gen 32 17.0 0.532 13.7584 <2e-16 ***
## window 24 4982.3 207.596 5373.4057 <2e-16 ***
## gen:window 768 4.9 0.006 0.1667 1
## Residuals 81675 3155.4 0.039
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
\(\xi_1\)
simplot(maize_merged,"xi",expression(bar(xi[i])/bar(xi[i[neut]])),2200)
sim_anova(maize_merged_anova,'xi')
## Analysis of Variance Table
##
## Response: value
## Df Sum Sq Mean Sq F value Pr(>F)
## gen 32 1638.4 51.201 143.9204 < 2.2e-16 ***
## window 24 99.2 4.132 11.6142 < 2.2e-16 ***
## gen:window 768 316.5 0.412 1.1586 0.001549 **
## Residuals 81519 29000.9 0.356
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
European<-read_sim("sims/results/sim_neutralPi_European_s5E-3.csv","sims/results/sim_singleton_European_s5E-3.csv")
European_merged <- merge_sel_neutral(European,European_neutral)
European_anova<-read_single_sim("sims/results/sim_neutralPi_European_s5E-3.csv","sims/results/sim_singleton_European_s5E-3.csv")
European_merged_anova <- merge_single_sel_neutral(European_anova,European_neutral_anova)
\(\pi\)
simplot(European_merged,"pi",expression(bar(pi)/bar(pi)[neut]),100)
sim_anova(European_merged_anova,'pi')
## Analysis of Variance Table
##
## Response: value
## Df Sum Sq Mean Sq F value Pr(>F)
## gen 32 18.2 0.570 11.2850 <2e-16 ***
## window 24 5062.0 210.915 4175.2597 <2e-16 ***
## gen:window 768 10.2 0.013 0.2632 1
## Residuals 81675 4125.9 0.051
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
\(\xi_1\)
simplot(European_merged,"xi",expression(bar(xi[i])/bar(xi[i[neut]])),100)
sim_anova(European_merged_anova,'xi')
## Analysis of Variance Table
##
## Response: value
## Df Sum Sq Mean Sq F value Pr(>F)
## gen 32 212.2 6.6304 29.5900 <2e-16 ***
## window 24 50.9 2.1205 9.4634 <2e-16 ***
## gen:window 768 162.3 0.2113 0.9432 0.8672
## Residuals 81675 18301.3 0.2241
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1