First we create the functions we are going to use

#simulate segments, the output are waiting times, topology and observed waiting times
sim.seg <- function(lambda,mu,ct){
  to = NULL
  t = NULL
  rt = rexp(1,lambda)  
  st = rt
  while(st < ct){
    p = pexp(ct-st,rate = mu,lower.tail = F)
    to1 = rbinom(1,1,p)
    to = c(to,to1)
    t = c(t,rt)
    rt = rexp(1,lambda)  
    st = st + rt
  }
  cums = cumsum(t)
  obs = cums[to==1]
  obs = c(obs[1],diff(obs)) # not the branching times, but the waiting times
  return(list(t=t,to=to,obs=obs))
}

sim.seg2p <- function(lambda,mu,ct){
  sm = nhpp.missing(pars=c(lambda,mu),ct=ct,miss=TRUE)
  so = nhpp.missing(pars=c(lambda,mu),ct=ct,miss=FALSE)
  df = data.frame(time=c(sm,so),to=c(rep(0,length(sm)),rep(1,length(so))))
  df = df[order(df$time),]
  return(list(t=c(df$time[1],diff(df$time)),to=df$to,obs=c(so[1],diff(so))))
}


llik = function(pars,s,ct){
  lambda = pars[1]
  mu = pars[2]
  bt = cumsum(s$t)
  to = s$to
  n = length(to)
  if(min(pars)<=0){l = -Inf}
  else{l = n*log(lambda)-lambda*ct-mu*sum((ct-bt)*to)+sum((1-to)*log(1-exp(-mu*(ct-bt))))}
  return(l)
}


mllik.ss = function(pars, ss,ct){
  m = length(ss)
  l = vector(mode = 'numeric',length = m)
  for(i in 1:m){
    s = ss[[i]]
    l[i] = llik(pars=pars,s=s,ct)
  }
  L = -sum(l)
  return(L)
}


#calculate the mle for a set of segments
mle.ss <- function(S,init_par = c(0.5,0.5),ct){
  p = subplex(par = init_par, fn = mllik.ss, ss=S,ct=ct)$par
  return(p)
}




# NHPP
nhpp.missing <- function(pars,ct=15,miss=TRUE){
  lambda0 = pars[1]
  mu0 = pars[2]
  t         <- 0
  s         <- 0
  if(miss) lambda <- function(t) lambda0*(1-exp(-mu0*(ct-t)))
  else lambda <- function(t) lambda0*exp(-mu0*(ct-t))
  Lambda <- function(tupper) integrate(f = lambda, lower = 0, upper = tupper)$value
  Lambda_inv <- function(s){
    v <- seq(0,ct+3, length.out = 1000)
    min(v[Vectorize(Lambda)(v)>=s])
  }
  X <- numeric(0)
  u <- rexp(1)
  s <- u
  while(s <= Lambda(ct)){
    t <- Lambda_inv(s)
    X <- c( X, t)
    u <- rexp(1)
    s <- s + u
  }
return(X)
}

# simulate missing data given the observed segment
rec.seg <- function(obs,lambda,p,ct){
  nhpp = nhpp.missing(c(lambda,p),ct=ct)
  obsc = cumsum(obs)
  df = data.frame(time=c(nhpp,obsc),to=c(rep(0,length(nhpp)),rep(1,length(obsc))))
  df = df[order(df$time),]
  return(list(t=c(df$time[1],diff(df$time)),to=df$to,obs=obs))
}

# monte-carlo sampling of size m
sim.srs <- function(Dt,lambda,p,m,ct){ 
  Rec = vector(mode = 'list',length = m)
  for(j in 1:m){
    rec=rec.seg(Dt,lambda,p,ct)
    Rec[[j]] = rec
  }
  return(Rec)
}

rel.llik <- function(S1,p0,p1,ct){
  m = length(S1)
  f1 = vector(mode='numeric',length = m)
  f2 = vector(mode='numeric',length = m)
  for(i in 1:m){
    s = S1[[i]]
    f1[i] = llik(pars=p1,s = s,ct = ct)
    f2[i] = llik(pars=p0,s = s,ct = ct)
    if(is.na(f1[i])) print(s)
  }
  Delta = -log(sum(f1/f2)/m)
  if(is.na(Delta)) print(paste("f1=",f1,"f2=",f2,"m=",m))
  return(Delta)
}

# mcem

pilot.chan <- function(Dt,ct,epsilon,m1=100){
# pilot study suggested by Chan et. al
  l1 = 20 #must be even (odd?)
  lambda = 1
  p = 0.1
  L = vector(mode="numeric",length = l1)
  P = vector(mode="numeric",length = l1)
  for(i in 1:l1){
    srs = sim.srs(Dt,lambda,p,m1,ct)
    mle = mle.ss(srs,ct = ct)
    L[i] = lambda = mle[1]
    P[i] = p = mle[2]
  }
  l = 10
  L = L[(l1-10):l1]
  P = P[(l1-10):l1]
  Q = vector(mode="numeric",length = (l1-10))
  for(i in 1:(l1-10)){
    Delta = vector(mode="numeric",length = l)
    for(j in 1:l){
      srs = sim.srs(Dt,L[i],P[i],m1,ct)
      mle = mle.ss(srs,ct = ct)
      Delta[j] = rel.llik(S1 = srs,p0 = c(L[i],P[i]),p1 = mle,ct=ct)
    }
    mD = mean(Delta)
    Q[i] = sum((Delta-mD)^2)
  }
  print(Q)
  s2 = sum(Q)/((l-1)*(l1-10+1))
  s1 = sqrt(s2)
  m = m1*s1/epsilon
  m = floor(m) + 1
  return(list(m=m,p=mle,s1=s1))
}


mcem.seg <- function(Dt,ep=0.01,ct,pilot=F,p){
  if(pilot){
    print("calculating proper m, this might take a while")
    pi = pilot.chan(Dt,ct,ep) #remove ep
    m = pi$m
    s1 = pi$s1
  }
  m = p$m
  s1 = p$s1
  sig = 100*s1/m
  tol = 2*sig*sqrt(1/5)
  D = Inf
  k = 1
  print("initializing mcem")
  lambda = L = p$p[1]
  p = P = p$p[2]
  while(abs(D)>tol){
    print(paste("iteration",k,"previous D: ",D, " required tolerance: ", tol))
    srs = sim.srs(Dt,lambda,p,m,ct)
    mle = mle.ss(srs,ct = ct)
    D = rel.llik(S1 = srs,p0 = c(lambda,p),p1 = mle,ct=ct)
    L[k+1] = lambda = mle[1]
    P[k+1] = p = mle[2]
    k = k+1
  }
  return(list(L=L,P=P))
}

First we check if that makes sense comparing simulation methods

m=10
M = matrix(ncol = 2,nrow = m)
for(j in 1:m){
  n = 10
  S = vector(mode='list',length=n)
  ct=15
  for(i in 1:n){
    S[[i]] = sim.seg(0.8,0.05,ct=ct)
  }
  #print(S)
  M[j,] = mle.ss(S = S,ct=ct)
}
summary(M)

##        V1               V2         
##  Min.   :0.7400   Min.   :0.03762  
##  1st Qu.:0.7767   1st Qu.:0.04309  
##  Median :0.8033   Median :0.04672  
##  Mean   :0.8147   Mean   :0.04985  
##  3rd Qu.:0.8400   3rd Qu.:0.05373  
##  Max.   :0.9267   Max.   :0.07560

m=10
M = matrix(ncol = 2,nrow = m)
for(j in 1:m){
  n = 10
  ct=15
  S = vector(mode='list',length=n)
  for(i in 1:n){
    S[[i]] = sim.seg2p(0.8,0.05,ct=ct)
  }
  #print(S)
  M[j,] = mle.ss(S = S,ct=ct)
}
summary(M)

##        V1               V2         
##  Min.   :0.7000   Min.   :0.03376  
##  1st Qu.:0.7883   1st Qu.:0.04498  
##  Median :0.8000   Median :0.04786  
##  Mean   :0.8307   Mean   :0.12869  
##  3rd Qu.:0.8050   3rd Qu.:0.06280  
##  Max.   :1.1667   Max.   :0.83333

comparing

p = proc.time()
n = 1000
d1 = vector(mode='numeric',length=n)
d2 = vector(mode='numeric',length=n)
ct=20
for(i in 1:n){
  d1[i] = length(sim.seg(0.8,0.05,ct=ct)$t)
  d2[i] = length(sim.seg2p(0.8,0.05,ct=ct)$t)
}
print(proc.time()-p)

##    user  system elapsed 
## 485.237   3.354 490.164

summary(d1)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    4.00   13.00   16.00   16.09   19.00   32.00

summary(d2)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    3.00   13.00   16.00   16.16   19.00   32.00

hwitli

p=proc.time()
m=100
M = matrix(ncol = 2,nrow = m)
lambda=0.8
mu=0.1
ct=15
for(j in 1:m){
  n = 10
  ct=15
  S = vector(mode='list',length=n)
  s = sim.seg(lambda,mu,ct)
  for(i in 1:n){
    S[[i]] = rec.seg(s$obs,lambda,mu)
  }
  M[j,] = mle.ss(S = S,ct=ct)
}
print(p-proc.time())

##     user   system  elapsed 
## -186.714   -3.577 -198.272

summary(M)

##        V1               V2         
##  Min.   :0.4200   Min.   :0.06022  
##  1st Qu.:0.6600   1st Qu.:0.08450  
##  Median :0.7933   Median :0.09956  
##  Mean   :0.8009   Mean   :0.11487  
##  3rd Qu.:0.9217   3rd Qu.:0.11974  
##  Max.   :1.2867   Max.   :0.56736

p=proc.time()
m=100
M = matrix(ncol = 2,nrow = m)
lambda=0.8
mu=0.1
ct=15
for(j in 1:m){
  n = 10
  ct=15
  S = vector(mode='list',length=n)
  s = sim.seg2p(lambda,mu,ct)
  for(i in 1:n){
    S[[i]] = rec.seg(s$obs,lambda,mu)
  }
  M[j,] = mle.ss(S = S,ct=ct)
}
summary(M)

##        V1               V2         
##  Min.   :0.4533   Min.   :0.04810  
##  1st Qu.:0.6783   1st Qu.:0.07599  
##  Median :0.7967   Median :0.10527  
##  Mean   :0.8233   Mean   :0.13037  
##  3rd Qu.:0.9533   3rd Qu.:0.13969  
##  Max.   :1.1933   Max.   :0.83333

print(p-proc.time())

##     user   system  elapsed 
## -226.081   -4.184 -240.186

MCEM

seg = sim.seg(0.8,0.1,20)
obs = seg$obs
obs
time = proc.time()
pilot.chan(obs,20,0.0001,m1=10)
print(proc.time()-time)
time = proc.time()
pilot.chan(obs,20,0.0001,m1=10)
print(proc.time()-time)
time = proc.time()
pilot.chan(obs,20,0.0001,m1=100)
print(proc.time()-time)

time = proc.time()
pilot.chan(obs,20,0.0001,m1=100)
print(proc.time()-time)

time = proc.time()
pilot.chan(obs,20,0.0001,m1=100)
print(proc.time()-time)

time = proc.time()
pilot.chan(obs,20,0.0001,m1=100)
print(proc.time()-time)

time = proc.time()
p=pilot.chan(obs,20,0.0001,m1=30)
mcem.seg(obs,ep=0.0001,ct=20,p=p)
print(proc.time()-time)

Experiment

time = proc.time()
n = 50
L = vector(mode="numeric",length = n)
P = vector(mode="numeric",length = n)
for(i in 1:n){
  seg = sim.seg(0.8,0.1,20)
  obs = seg$obs
  p=pilot.chan(obs,20,0.0001,m1=20)
  mcem = mcem.seg(obs,ep=0.0001,ct=20,p=p)
  print(mcem)
  L[i] = mcem$L[length(mcem$L)]
  P[i] = mcem$P[length(mcem$P)]
}
print(proc.time()-time)
summary(L)
summary(P)