Do we expect a correlation between the mean number of crossovers per chromosome and estimates of interference due simply to issues of power? See original (very cool) paper for details. We simulate (badly) some chromosomes with uniform distribution of crossovers, then apply some constant interference and see how the correlation between estimated interference and crossovers becomes.

library(cowplot)
## Loading required package: ggplot2
## 
## Attaching package: 'cowplot'
## 
## The following object is masked from 'package:ggplot2':
## 
##     ggsave

Functions – the gamma distribution to calculate probability of distance, function to make some data of distances between chiasmata, do a sim and estimate a likelihood surface

#gamma
dist_fun=function(x,v){(2*v)^v*x^(v-1)*exp(-2*x*v)/gamma(v)}

#make some distance data
make_data=function(r,i){  
  #add COs to bivalent
  chroms=(lapply(1:10, function(x) sort(sample(1:100/100,size=rpois(1,r)))))
  #minimum 1 CO per bivalent
  for(c in 1:10){ if(length(chroms[[c]])==0){chroms[[c]]=sample(1:100/100,size=1)}} 
  cos<-sapply(chroms,function(x) length(x))
  #get distance between neighboring 
  distances=unlist(lapply(chroms[which(cos>1)], function(x) diff(x)))
  #remove all COs closer than i for interference
  return(list(subset(distances,distances>i),sum(cos)))
}

#do a sim, estimate likelihood surface
do_sim<-function(range,mean_co,i){
  inter=as.numeric()
  mco=as.numeric()
  for(r in mean_co){
    bivalents=as.numeric() 
    while(length(bivalents)==0){ 
      mydata=make_data(r,i) 
      bivalents=mydata[[1]] 
    }
    like_surf=sapply(range, function(V) sum(sapply(bivalents, function(X) log(dist_fun(X,V)))))
    inter[length(inter)+1]=range[which(like_surf==max(like_surf))]
    mco[length(mco)+1]=mydata[[2]]
  }
    dataplot=data.frame(inter,mco)
    return(dataplot)
}

Do a sim with no interference

range=1:100/10 #grid of `interference` values
mean_co=500:3000/1000 #range of mean COs per chromosome
noint=do_sim(range,mean_co,0)
summary(lm(data=noint,inter~mco))
## 
## Call:
## lm(formula = inter ~ mco, data = noint)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4101 -0.9101 -0.3585  0.2765  7.9482 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.838464   0.110638   34.69   <2e-16 ***
## mco         -0.111667   0.005286  -21.12   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.794 on 2499 degrees of freedom
## Multiple R-squared:  0.1515, Adjusted R-squared:  0.1512 
## F-statistic: 446.2 on 1 and 2499 DF,  p-value: < 2.2e-16

Plot

ggplot(noint,aes(y=inter,x=mco))+
  geom_point()+
  geom_smooth(method="lm")+
  xlab("Crossovers per Genome")+
  ylab("Estimated Interference")+
  ggtitle("No Interference")

Redo sims with interference:

range=1:100/10 #grid of `interference` values
mean_co=500:3000/1000 #range of mean COs per chromosome
int=do_sim(range,mean_co,0.3)
summary(lm(data=int,inter~mco))
## 
## Call:
## lm(formula = inter ~ mco, data = int)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9907 -0.1175  0.7137  1.0560  1.3494 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 8.112713   0.108080  75.062   <2e-16 ***
## mco         0.048897   0.005136   9.521   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.697 on 2499 degrees of freedom
## Multiple R-squared:  0.035,  Adjusted R-squared:  0.03462 
## F-statistic: 90.64 on 1 and 2499 DF,  p-value: < 2.2e-16

Plot

ggplot(int,aes(y=inter,x=mco))+
  geom_point()+
  geom_smooth(method="lm")+
  xlab("Crossovers per Genome")+
  ylab("Estimated Interference")+
  ggtitle("Interference")