knitr::opts_knit$set(root.dir = '../')

For maize, we simulated data from \(N_e=10,000\) with bottleneck and growth following Beissinger et al. 2016. For Europeans, we follow a slightly modified model of Torres et al. 2017 that excludes the ancestral expansion, and scales ancestral \(N_e=10,000\), but the gene model is as above.

We simulate neutral \(\pi\) and \(\xi_1\) (singletons) shown every 100 generations in each of 24 4Kb windows centered around a 4kb “gene” which has neutral () and deleterious () mutations. We run simulations with and without background selection. To simulate background selection we draaw deleterious mutations from a gamma distribution with some mean s (see below).

For each parameter set (select \(\times\) statistic \(\times\) demography) we then want to test the effect of generation and genetation \(\times\) window on the statistic and do so with a simple ANOVA.

Boring Libraries and functions

library(tidyverse)
library(ggjoy)
library(viridis)
library(cowplot)

Read in data from pylibseq.

read_sim<-function(pidata,xidata,neutral=FALSE){
  #read data
  pi<-as.tibble(read.csv(pidata,header=F))
  pi<-mutate(pi,stat=rep("pi",length(pi[,1])))
  xi<-as.tibble(read.csv(xidata,header=F)) 
  xi<-mutate(xi,stat=rep("xi",length(xi[,1])))
  #munge into single tibble of means
  data<-rbind(xi,pi) %>%
    `colnames<-`(c("gen", 1:25, "sim", "stat")) %>%
    gather(window,value,2:26) %>% 
    mutate(gen=gen-100000) %>%
    group_by(gen,stat,window) %>%
    summarize(mean_val=mean(value)) %>%
    mutate(window=(as.numeric(window)-13)*4000*3E-5,mean_val=mean_val)
    data=mutate(data,mean_val=if_else(window==0.00,mean_val*4,mean_val))
  if(neutral==TRUE){
    data=mutate(data,mean_val=if_else(window==0.00,mean_val/4,mean_val))
  }
  return(data)
}

Read in only a single simulation rather than means. For ANOVA later.

read_single_sim<-function(pidata,xidata,neutral=FALSE){
  #read data
  pi<-as.tibble(read.csv(pidata,header=F))
  pi<-mutate(pi,stat=rep("pi",length(pi[,1])))
  xi<-as.tibble(read.csv(xidata,header=F)) 
  xi<-mutate(xi,stat=rep("xi",length(xi[,1])))
  #munge into single tibble of means
  data<-rbind(xi,pi) %>%
    `colnames<-`(c("gen", 1:25, "sim", "stat")) %>%
    gather(window,value,2:26) %>% 
    mutate(gen=gen-100000) %>%
    mutate(window=(as.numeric(window)-13)*4000*3E-5)
    data=mutate(data,value=if_else(window==0.00,value*4,value))
  if(neutral==TRUE){
    data=mutate(data,value=if_else(window==0.00,value/4,value))
  }
  return(data)
}

Merge data with BGS with a neutral sim to standardize.

merge_sel_neutral <- function(sel_sim_data,neut_sim_data){
  merged <- merge(sel_sim_data,neut_sim_data,by=c('gen','stat','window'))
  merged$mean_val <- merged$mean_val.x/merged$mean_val.y
  merged
}

Merge individual BGS sims with paired neutral sim.

merge_single_sel_neutral <- function(sel_sim_data,neut_sim_data){
  merged <- merge(sel_sim_data,neut_sim_data,by=c('gen','sim','stat','window'))
  merged$value <- merged$value.x/merged$value.y
  merged
}

ANOVA on sim results.

sim_anova <- function(somedata,stats){
  sep_parm <- filter(somedata,stat==stats,!is.na(value),value != Inf)
  sep_parm$gen <- as.factor(sep_parm$gen)
  sep_parm$window <- as.factor(sep_parm$window)
  fit <- lm(value ~ gen + window + gen*window, data=sep_parm,na.action = na.exclude)

  print(anova(fit))
}

Plot data

simplot<-function(somedata,mystat,label,bneck){
  ggplot(filter(somedata,gen %% 100 <1,stat==mystat,gen>bneck), aes(x=window, y=mean_val, group = gen,color=gen)) + 
  scale_color_viridis(name='Generation') +   
  background_grid(major = "xy", minor = "none") +
  geom_line() + 
  ylab(label)  + xlab("cM") +
  scale_x_continuous(breaks = seq(-1.5,1.5,0.5))
}

Neutral Sims

Maize

maize_neutral <-read_sim("sims/results/sim_neutralPi_maize_neutral2.csv","sims/results/sim_singleton_maize_neutral2.csv",neutral=TRUE)
## Warning: package 'bindrcpp' was built under R version 3.3.2
maize_neutral_anova <-read_single_sim("sims/results/sim_neutralPi_maize_neutral2.csv","sims/results/sim_singleton_maize_neutral2.csv",neutral=TRUE)
simplot(maize_neutral,"pi",expression(bar(pi)),2200)

European

European_neutral <-read_sim("sims/results/sim_neutralPi_European_neutral2.csv","sims/results/sim_singleton_European_neutral2.csv",neutral=TRUE)
European_neutral_anova <-read_single_sim("sims/results/sim_neutralPi_European_neutral2.csv","sims/results/sim_singleton_European_neutral2.csv",neutral=TRUE)
simplot(European_neutral,"pi",expression(bar(pi)),2200)

Weak selection

Set mean to \(s=5\times 10^{-5}\)

Maize

maize<-read_sim("sims/results/sim_neutralPi_maize_s5E-5.csv","sims/results/sim_singleton_maize_s5E-5.csv")
maize_anova<-read_single_sim("sims/results/sim_neutralPi_maize_s5E-5.csv","sims/results/sim_singleton_maize_s5E-5.csv")
maize_merged <- merge_sel_neutral(maize,maize_neutral)
maize_merged_anova <- merge_single_sel_neutral(maize_anova,maize_neutral_anova)

\(\pi\)

simplot(maize_merged,"pi",expression(bar(pi)/bar(pi)[neut]),2200)

sim_anova(maize_merged_anova,'pi')
## Analysis of Variance Table
## 
## Response: value
##               Df Sum Sq Mean Sq  F value Pr(>F)    
## gen           32    1.4   0.043   0.5005 0.9917    
## window        24 1918.5  79.938 926.3525 <2e-16 ***
## gen:window   768   11.1   0.014   0.1672 1.0000    
## Residuals  81675 7048.0   0.086                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

\(\xi_1\)

simplot(maize_merged,"xi",expression(bar(xi[i])/bar(xi[i[neut]])),2200)

sim_anova(maize_merged_anova,'xi')
## Analysis of Variance Table
## 
## Response: value
##               Df Sum Sq Mean Sq  F value Pr(>F)    
## gen           32   2195  68.594 146.0606 <2e-16 ***
## window        24     10   0.407   0.8665 0.6507    
## gen:window   768    338   0.440   0.9372 0.8915    
## Residuals  81519  38283   0.470                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Human

European<-read_sim("sims/results/sim_neutralPi_European_s5E-5.csv","sims/results/sim_singleton_European_s5E-5.csv")
European_merged <- merge_sel_neutral(European,European_neutral)
European_anova<-read_single_sim("sims/results/sim_neutralPi_European_s5E-5.csv","sims/results/sim_singleton_European_s5E-5.csv")
European_merged_anova <- merge_single_sel_neutral(European_anova,European_neutral_anova)

\(\pi\)

simplot(European_merged,"pi",expression(bar(pi)/bar(pi)[neut]),100)

sim_anova(European_merged_anova,'pi')
## Analysis of Variance Table
## 
## Response: value
##               Df Sum Sq Mean Sq  F value  Pr(>F)    
## gen           32    6.6   0.205   1.7479 0.00553 ** 
## window        24 1889.0  78.708 670.7534 < 2e-16 ***
## gen:window   768   19.5   0.025   0.2164 1.00000    
## Residuals  81675 9583.9   0.117                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

\(\xi_1\)

simplot(European_merged,"xi",expression(bar(xi[i])/bar(xi[i[neut]])),100)

sim_anova(European_merged_anova,'xi')
## Analysis of Variance Table
## 
## Response: value
##               Df  Sum Sq Mean Sq F value    Pr(>F)    
## gen           32   142.5  4.4520 16.4619 < 2.2e-16 ***
## window        24    25.8  1.0749  3.9745 1.846e-10 ***
## gen:window   768   190.9  0.2486  0.9193    0.9454    
## Residuals  81675 22088.4  0.2704                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Intermediate selection

\(s=5\times 10^{-4}\)

Maize

maize<-read_sim("sims/results/sim_neutralPi_maize_s5E-4.csv","sims/results/sim_singleton_maize_s5E-4.csv")
maize_anova<-read_single_sim("sims/results/sim_neutralPi_maize_s5E-4.csv","sims/results/sim_singleton_maize_s5E-4.csv")
maize_merged <- merge_sel_neutral(maize,maize_neutral)
maize_merged_anova <- merge_single_sel_neutral(maize_anova,maize_neutral_anova)

\(\pi\)

simplot(maize_merged,"pi",expression(bar(pi)/bar(pi)[neut]),2200)

sim_anova(maize_merged_anova,'pi')
## Analysis of Variance Table
## 
## Response: value
##               Df Sum Sq Mean Sq   F value Pr(>F)    
## gen           32    0.6   0.018    0.2866      1    
## window        24 5592.5 233.021 3722.7011 <2e-16 ***
## gen:window   768    8.1   0.010    0.1675      1    
## Residuals  81675 5112.4   0.063                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

\(\xi_1\)

simplot(maize_merged,"xi",expression(bar(xi[i])/bar(xi[i[neut]])),2200)

sim_anova(maize_merged_anova,'xi')
## Analysis of Variance Table
## 
## Response: value
##               Df Sum Sq Mean Sq  F value    Pr(>F)    
## gen           32   1874  58.556 138.8111 < 2.2e-16 ***
## window        24     47   1.955   4.6348 3.514e-13 ***
## gen:window   768    382   0.497   1.1781 0.0004822 ***
## Residuals  81519  34388   0.422                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Human

European<-read_sim("sims/results/sim_neutralPi_European_s5E-4.csv","sims/results/sim_singleton_European_s5E-4.csv")
European_merged <- merge_sel_neutral(European,European_neutral)
European_anova<-read_single_sim("sims/results/sim_neutralPi_European_s5E-4.csv","sims/results/sim_singleton_European_s5E-4.csv")
European_merged_anova <- merge_single_sel_neutral(European_anova,European_neutral_anova)

\(\pi\)

simplot(European_merged,"pi",expression(bar(pi)/bar(pi)[neut]),100)

sim_anova(European_merged_anova,'pi')
## Analysis of Variance Table
## 
## Response: value
##               Df Sum Sq Mean Sq   F value Pr(>F)    
## gen           32    2.7   0.084    0.9819 0.4957    
## window        24 5714.5 238.104 2784.8759 <2e-16 ***
## gen:window   768   19.4   0.025    0.2950 1.0000    
## Residuals  81675 6983.1   0.085                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

\(\xi_1\)

simplot(European_merged,"xi",expression(bar(xi[i])/bar(xi[i[neut]])),100)

sim_anova(European_merged_anova,'xi')
## Analysis of Variance Table
## 
## Response: value
##               Df  Sum Sq Mean Sq F value    Pr(>F)    
## gen           32   166.5  5.2037 20.1613 < 2.2e-16 ***
## window        24    32.4  1.3508  5.2337 9.943e-16 ***
## gen:window   768   182.5  0.2377  0.9209    0.9418    
## Residuals  81675 21080.8  0.2581                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Strong selection

\(s=5\times 10^{-3}\) ###Maize

maize<-read_sim("sims/results/sim_neutralPi_maize_s5E-3.csv","sims/results/sim_singleton_maize_s5E-3.csv")
maize_anova<-read_single_sim("sims/results/sim_neutralPi_maize_s5E-3.csv","sims/results/sim_singleton_maize_s5E-3.csv")
maize_merged <- merge_sel_neutral(maize,maize_neutral)
maize_merged_anova <- merge_single_sel_neutral(maize_anova,maize_neutral_anova)

\(\pi\)

simplot(maize_merged,"pi",expression(bar(pi)/bar(pi)[neut]),2200)

sim_anova(maize_merged_anova,'pi')
## Analysis of Variance Table
## 
## Response: value
##               Df Sum Sq Mean Sq   F value Pr(>F)    
## gen           32   17.0   0.532   13.7584 <2e-16 ***
## window        24 4982.3 207.596 5373.4057 <2e-16 ***
## gen:window   768    4.9   0.006    0.1667      1    
## Residuals  81675 3155.4   0.039                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

\(\xi_1\)

simplot(maize_merged,"xi",expression(bar(xi[i])/bar(xi[i[neut]])),2200)

sim_anova(maize_merged_anova,'xi')
## Analysis of Variance Table
## 
## Response: value
##               Df  Sum Sq Mean Sq  F value    Pr(>F)    
## gen           32  1638.4  51.201 143.9204 < 2.2e-16 ***
## window        24    99.2   4.132  11.6142 < 2.2e-16 ***
## gen:window   768   316.5   0.412   1.1586  0.001549 ** 
## Residuals  81519 29000.9   0.356                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Human

European<-read_sim("sims/results/sim_neutralPi_European_s5E-3.csv","sims/results/sim_singleton_European_s5E-3.csv")
European_merged <- merge_sel_neutral(European,European_neutral)
European_anova<-read_single_sim("sims/results/sim_neutralPi_European_s5E-3.csv","sims/results/sim_singleton_European_s5E-3.csv")
European_merged_anova <- merge_single_sel_neutral(European_anova,European_neutral_anova)

\(\pi\)

simplot(European_merged,"pi",expression(bar(pi)/bar(pi)[neut]),100)

sim_anova(European_merged_anova,'pi')
## Analysis of Variance Table
## 
## Response: value
##               Df Sum Sq Mean Sq   F value Pr(>F)    
## gen           32   18.2   0.570   11.2850 <2e-16 ***
## window        24 5062.0 210.915 4175.2597 <2e-16 ***
## gen:window   768   10.2   0.013    0.2632      1    
## Residuals  81675 4125.9   0.051                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

\(\xi_1\)

simplot(European_merged,"xi",expression(bar(xi[i])/bar(xi[i[neut]])),100)

sim_anova(European_merged_anova,'xi')
## Analysis of Variance Table
## 
## Response: value
##               Df  Sum Sq Mean Sq F value Pr(>F)    
## gen           32   212.2  6.6304 29.5900 <2e-16 ***
## window        24    50.9  2.1205  9.4634 <2e-16 ***
## gen:window   768   162.3  0.2113  0.9432 0.8672    
## Residuals  81675 18301.3  0.2241                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1