Do we expect a correlation between the mean number of crossovers per chromosome and estimates of interference due simply to issues of power? See original (very cool) paper for details. We simulate (badly) some chromosomes with uniform distribution of crossovers, then apply some constant interference and see how the correlation between estimated interference and crossovers becomes.
library(cowplot)
## Loading required package: ggplot2
##
## Attaching package: 'cowplot'
##
## The following object is masked from 'package:ggplot2':
##
## ggsave
Functions – the gamma distribution to calculate probability of distance, function to make some data of distances between chiasmata, do a sim and estimate a likelihood surface
#gamma
dist_fun=function(x,v){(2*v)^v*x^(v-1)*exp(-2*x*v)/gamma(v)}
#make some distance data
make_data=function(r,i){
#add COs to bivalent
chroms=(lapply(1:10, function(x) sort(sample(1:100/100,size=rpois(1,r)))))
#minimum 1 CO per bivalent
for(c in 1:10){ if(length(chroms[[c]])==0){chroms[[c]]=sample(1:100/100,size=1)}}
cos<-sapply(chroms,function(x) length(x))
#get distance between neighboring
distances=unlist(lapply(chroms[which(cos>1)], function(x) diff(x)))
#remove all COs closer than i for interference
return(list(subset(distances,distances>i),sum(cos)))
}
#do a sim, estimate likelihood surface
do_sim<-function(range,mean_co,i){
inter=as.numeric()
mco=as.numeric()
for(r in mean_co){
bivalents=as.numeric()
while(length(bivalents)==0){
mydata=make_data(r,i)
bivalents=mydata[[1]]
}
like_surf=sapply(range, function(V) sum(sapply(bivalents, function(X) log(dist_fun(X,V)))))
inter[length(inter)+1]=range[which(like_surf==max(like_surf))]
mco[length(mco)+1]=mydata[[2]]
}
dataplot=data.frame(inter,mco)
return(dataplot)
}
Do a sim with no interference
range=1:100/10 #grid of `interference` values
mean_co=500:3000/1000 #range of mean COs per chromosome
noint=do_sim(range,mean_co,0)
summary(lm(data=noint,inter~mco))
##
## Call:
## lm(formula = inter ~ mco, data = noint)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4101 -0.9101 -0.3585 0.2765 7.9482
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.838464 0.110638 34.69 <2e-16 ***
## mco -0.111667 0.005286 -21.12 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.794 on 2499 degrees of freedom
## Multiple R-squared: 0.1515, Adjusted R-squared: 0.1512
## F-statistic: 446.2 on 1 and 2499 DF, p-value: < 2.2e-16
Plot
ggplot(noint,aes(y=inter,x=mco))+
geom_point()+
geom_smooth(method="lm")+
xlab("Crossovers per Genome")+
ylab("Estimated Interference")+
ggtitle("No Interference")
Redo sims with interference:
range=1:100/10 #grid of `interference` values
mean_co=500:3000/1000 #range of mean COs per chromosome
int=do_sim(range,mean_co,0.3)
summary(lm(data=int,inter~mco))
##
## Call:
## lm(formula = inter ~ mco, data = int)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9907 -0.1175 0.7137 1.0560 1.3494
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.112713 0.108080 75.062 <2e-16 ***
## mco 0.048897 0.005136 9.521 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.697 on 2499 degrees of freedom
## Multiple R-squared: 0.035, Adjusted R-squared: 0.03462
## F-statistic: 90.64 on 1 and 2499 DF, p-value: < 2.2e-16
Plot
ggplot(int,aes(y=inter,x=mco))+
geom_point()+
geom_smooth(method="lm")+
xlab("Crossovers per Genome")+
ylab("Estimated Interference")+
ggtitle("Interference")