7. Counting recombination events

We can count the recombination events from the state path data with this function.

CountRecombEvents<-function(parent.path){
  current<-parent.path[1];
  counter<-0;
  for(val in parent.path){
    if(val!=current){
      counter<-counter+1
      current<-val
    }
  }
  return (counter)
}

We will peform a simulation and estimation and count simulated and estimated recombination events.

  start.p<-c(0.25,0.25,0.25,0.25)
  recombination.rate<-0.001
  trans.p<-GetTransitionProb(recombination.rate)
  child1.chromatid.mom<-SimulateChromatid(10000,recombination.rate) 
  child1.chromatid.dad<-SimulateChromatid(10000,recombination.rate)
  obs<-child1.chromatid.mom$child+child1.chromatid.dad$child
  v.2<-Viterbi3 (obs,start.p, trans.p,child1.chromatid.mom,child1.chromatid.dad)
  vitrowmax.2 <- apply(v.2, 1, function(x) which.max(x))
  simulated<-paste("mom.",child1.chromatid.mom$parent.path,"/","dad.",child1.chromatid.dad$parent.path,sep="")
  estimated<-rownames(trans.p)[vitrowmax.2]
  sim.results<-sum(simulated==estimated)

The number for true positives from our estimation would be given by.

  sum(sim.results)
## [1] 9988

and the percentage of true positives is 99.88%

And counting recombination events

   CountRecombEvents(simulated)
## [1] 19
   CountRecombEvents(estimated)
## [1] 20
   CountRecombEvents(estimated)-CountRecombEvents(simulated)
## [1] 1
   10000-sim.results
## [1] 12

Another simulation using the same recombination rate

  child1.chromatid.mom<-SimulateChromatid(10000,recombination.rate) 
  child1.chromatid.dad<-SimulateChromatid(10000,recombination.rate)
  obs<-child1.chromatid.mom$child+child1.chromatid.dad$child
  v.2<-Viterbi3 (obs,start.p, trans.p,child1.chromatid.mom,child1.chromatid.dad)
  vitrowmax.2 <- apply(v.2, 1, function(x) which.max(x))
  simulated<-paste("mom.",child1.chromatid.mom$parent.path,"/","dad.",child1.chromatid.dad$parent.path,sep="")
  estimated<-rownames(trans.p)[vitrowmax.2]
  sim.results<-sum(simulated==estimated)
  sim.results
## [1] 9972
  CountRecombEvents(simulated)
## [1] 30
  CountRecombEvents(estimated)
## [1] 34
  CountRecombEvents(estimated)-CountRecombEvents(simulated)
## [1] 4
  10000-sim.results
## [1] 28

I appears wrong estimations from our model make us overestimate the number of recombination events. Let’s update our simulator to report the numer of recombination events per simulation in addition to the number of correct estimation of parental contribuitions.

ViterbiSimulator2 <- function(num.markers,num.simulations,start.p,recombination.rate){ 
  sim.results<-c()
  sim.recomb.events<-c()
  est.recomb.events<-c()
  trans.p<-GetTransitionProb(recombination.rate)
  for (i in 1:num.simulations){
    child1.chromatid.mom<-SimulateChromatid(num.markers,recombination.rate) #100 markers
    child1.chromatid.dad<-SimulateChromatid(num.markers,recombination.rate)
    obs<-child1.chromatid.mom$child+child1.chromatid.dad$child
    v.2<-Viterbi3 (obs,start.p, trans.p,child1.chromatid.mom,child1.chromatid.dad)
    vitrowmax.2 <- apply(v.2, 1, function(x) which.max(x))
    simulated<-paste("mom.",child1.chromatid.mom$parent.path,"/","dad.",child1.chromatid.dad$parent.path,sep="")
    estimated<-rownames(trans.p)[vitrowmax.2]
    sim.results[i]<-sum(simulated==estimated)
    sim.recomb.events[i]<-CountRecombEvents(simulated);
    est.recomb.events[i]<-CountRecombEvents(estimated);
  }
  return(list(sim.results=sim.results,sim.recomb.events=sim.recomb.events,est.recomb.events=est.recomb.events))
}  

simul.results<-ViterbiSimulator2(1000,100,c(0.25,0.25,0.25,0.25),0.01)
summary(simul.results)
##                   Length Class  Mode   
## sim.results       100    -none- numeric
## sim.recomb.events 100    -none- numeric
## est.recomb.events 100    -none- numeric
summary(reg1<-lm(simul.results$est.recomb.events~simul.results$sim.recomb.events))
## 
## Call:
## lm(formula = simul.results$est.recomb.events ~ simul.results$sim.recomb.events)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.9488 -1.1357 -0.2762  1.0369  5.7149 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      0.84680    0.89555   0.946    0.347    
## simul.results$sim.recomb.events  1.08408    0.04394  24.673   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.936 on 98 degrees of freedom
## Multiple R-squared:  0.8613, Adjusted R-squared:  0.8599 
## F-statistic: 608.7 on 1 and 98 DF,  p-value: < 2.2e-16
plot(simul.results$est.recomb.events~simul.results$sim.recomb.events,ylab="estimated",xlab="simulated",main="Number of recombination events\n1000 loci, r=0.01")
abline(reg1)

We can see that although we are overestimating recombination events, they are proportional to the simulted number of recombination events. Now let’s test the same but with 10,000 markers and recombination rate = 0.001 (50 simulations).

simul.results<-ViterbiSimulator2(10000,50,c(0.25,0.25,0.25,0.25),0.001)
summary(simul.results)
##                   Length Class  Mode   
## sim.results       50     -none- numeric
## sim.recomb.events 50     -none- numeric
## est.recomb.events 50     -none- numeric
summary(reg1<-lm(simul.results$est.recomb.events~simul.results$sim.recomb.events))
## 
## Call:
## lm(formula = simul.results$est.recomb.events ~ simul.results$sim.recomb.events)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1589 -0.9312 -0.1238  1.1086  4.1405 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      2.10795    0.84717   2.488   0.0164 *  
## simul.results$sim.recomb.events  1.07006    0.03994  26.789   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.598 on 48 degrees of freedom
## Multiple R-squared:  0.9373, Adjusted R-squared:  0.936 
## F-statistic: 717.6 on 1 and 48 DF,  p-value: < 2.2e-16
plot(simul.results$est.recomb.events~simul.results$sim.recomb.events,ylab="estimated",xlab="simulated",main="Number of recombination events\n10,000 loci, r=0.001")
abline(reg1)

Author

Luis Avila (lmavila@gmail.com)