We can count the recombination events from the state path data with this function.
CountRecombEvents<-function(parent.path){
current<-parent.path[1];
counter<-0;
for(val in parent.path){
if(val!=current){
counter<-counter+1
current<-val
}
}
return (counter)
}
We will peform a simulation and estimation and count simulated and estimated recombination events.
start.p<-c(0.25,0.25,0.25,0.25)
recombination.rate<-0.001
trans.p<-GetTransitionProb(recombination.rate)
child1.chromatid.mom<-SimulateChromatid(10000,recombination.rate)
child1.chromatid.dad<-SimulateChromatid(10000,recombination.rate)
obs<-child1.chromatid.mom$child+child1.chromatid.dad$child
v.2<-Viterbi3 (obs,start.p, trans.p,child1.chromatid.mom,child1.chromatid.dad)
vitrowmax.2 <- apply(v.2, 1, function(x) which.max(x))
simulated<-paste("mom.",child1.chromatid.mom$parent.path,"/","dad.",child1.chromatid.dad$parent.path,sep="")
estimated<-rownames(trans.p)[vitrowmax.2]
sim.results<-sum(simulated==estimated)
The number for true positives from our estimation would be given by.
sum(sim.results)
## [1] 9988
and the percentage of true positives is 99.88%
And counting recombination events
CountRecombEvents(simulated)
## [1] 19
CountRecombEvents(estimated)
## [1] 20
CountRecombEvents(estimated)-CountRecombEvents(simulated)
## [1] 1
10000-sim.results
## [1] 12
Another simulation using the same recombination rate
child1.chromatid.mom<-SimulateChromatid(10000,recombination.rate)
child1.chromatid.dad<-SimulateChromatid(10000,recombination.rate)
obs<-child1.chromatid.mom$child+child1.chromatid.dad$child
v.2<-Viterbi3 (obs,start.p, trans.p,child1.chromatid.mom,child1.chromatid.dad)
vitrowmax.2 <- apply(v.2, 1, function(x) which.max(x))
simulated<-paste("mom.",child1.chromatid.mom$parent.path,"/","dad.",child1.chromatid.dad$parent.path,sep="")
estimated<-rownames(trans.p)[vitrowmax.2]
sim.results<-sum(simulated==estimated)
sim.results
## [1] 9972
CountRecombEvents(simulated)
## [1] 30
CountRecombEvents(estimated)
## [1] 34
CountRecombEvents(estimated)-CountRecombEvents(simulated)
## [1] 4
10000-sim.results
## [1] 28
I appears wrong estimations from our model make us overestimate the number of recombination events. Let’s update our simulator to report the numer of recombination events per simulation in addition to the number of correct estimation of parental contribuitions.
ViterbiSimulator2 <- function(num.markers,num.simulations,start.p,recombination.rate){
sim.results<-c()
sim.recomb.events<-c()
est.recomb.events<-c()
trans.p<-GetTransitionProb(recombination.rate)
for (i in 1:num.simulations){
child1.chromatid.mom<-SimulateChromatid(num.markers,recombination.rate) #100 markers
child1.chromatid.dad<-SimulateChromatid(num.markers,recombination.rate)
obs<-child1.chromatid.mom$child+child1.chromatid.dad$child
v.2<-Viterbi3 (obs,start.p, trans.p,child1.chromatid.mom,child1.chromatid.dad)
vitrowmax.2 <- apply(v.2, 1, function(x) which.max(x))
simulated<-paste("mom.",child1.chromatid.mom$parent.path,"/","dad.",child1.chromatid.dad$parent.path,sep="")
estimated<-rownames(trans.p)[vitrowmax.2]
sim.results[i]<-sum(simulated==estimated)
sim.recomb.events[i]<-CountRecombEvents(simulated);
est.recomb.events[i]<-CountRecombEvents(estimated);
}
return(list(sim.results=sim.results,sim.recomb.events=sim.recomb.events,est.recomb.events=est.recomb.events))
}
simul.results<-ViterbiSimulator2(1000,100,c(0.25,0.25,0.25,0.25),0.01)
summary(simul.results)
## Length Class Mode
## sim.results 100 -none- numeric
## sim.recomb.events 100 -none- numeric
## est.recomb.events 100 -none- numeric
summary(reg1<-lm(simul.results$est.recomb.events~simul.results$sim.recomb.events))
##
## Call:
## lm(formula = simul.results$est.recomb.events ~ simul.results$sim.recomb.events)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.9488 -1.1357 -0.2762 1.0369 5.7149
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.84680 0.89555 0.946 0.347
## simul.results$sim.recomb.events 1.08408 0.04394 24.673 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.936 on 98 degrees of freedom
## Multiple R-squared: 0.8613, Adjusted R-squared: 0.8599
## F-statistic: 608.7 on 1 and 98 DF, p-value: < 2.2e-16
plot(simul.results$est.recomb.events~simul.results$sim.recomb.events,ylab="estimated",xlab="simulated",main="Number of recombination events\n1000 loci, r=0.01")
abline(reg1)
We can see that although we are overestimating recombination events, they are proportional to the simulted number of recombination events. Now let’s test the same but with 10,000 markers and recombination rate = 0.001 (50 simulations).
simul.results<-ViterbiSimulator2(10000,50,c(0.25,0.25,0.25,0.25),0.001)
summary(simul.results)
## Length Class Mode
## sim.results 50 -none- numeric
## sim.recomb.events 50 -none- numeric
## est.recomb.events 50 -none- numeric
summary(reg1<-lm(simul.results$est.recomb.events~simul.results$sim.recomb.events))
##
## Call:
## lm(formula = simul.results$est.recomb.events ~ simul.results$sim.recomb.events)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1589 -0.9312 -0.1238 1.1086 4.1405
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.10795 0.84717 2.488 0.0164 *
## simul.results$sim.recomb.events 1.07006 0.03994 26.789 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.598 on 48 degrees of freedom
## Multiple R-squared: 0.9373, Adjusted R-squared: 0.936
## F-statistic: 717.6 on 1 and 48 DF, p-value: < 2.2e-16
plot(simul.results$est.recomb.events~simul.results$sim.recomb.events,ylab="estimated",xlab="simulated",main="Number of recombination events\n10,000 loci, r=0.001")
abline(reg1)