First we create the functions we are going to use
#simulate segments, the output are waiting times, topology and observed waiting times
sim.seg <- function(lambda,mu,ct){
to = NULL
t = NULL
rt = rexp(1,lambda)
st = rt
while(st < ct){
p = pexp(ct-st,rate = mu,lower.tail = F)
to1 = rbinom(1,1,p)
to = c(to,to1)
t = c(t,rt)
rt = rexp(1,lambda)
st = st + rt
}
cums = cumsum(t)
obs = cums[to==1]
obs = c(obs[1],diff(obs)) # not the branching times, but the waiting times
return(list(t=t,to=to,obs=obs))
}
sim.seg2p <- function(lambda,mu,ct){
sm = nhpp.missing(pars=c(lambda,mu),ct=ct,miss=TRUE)
so = nhpp.missing(pars=c(lambda,mu),ct=ct,miss=FALSE)
df = data.frame(time=c(sm,so),to=c(rep(0,length(sm)),rep(1,length(so))))
df = df[order(df$time),]
return(list(t=c(df$time[1],diff(df$time)),to=df$to,obs=c(so[1],diff(so))))
}
llik = function(pars,s,ct){
lambda = pars[1]
mu = pars[2]
bt = cumsum(s$t)
to = s$to
n = length(to)
if(min(pars)<=0){l = -Inf}
else{l = n*log(lambda)-lambda*ct-mu*sum((ct-bt)*to)+sum((1-to)*log(1-exp(-mu*(ct-bt))))}
return(l)
}
mllik.ss = function(pars, ss,ct){
m = length(ss)
l = vector(mode = 'numeric',length = m)
for(i in 1:m){
s = ss[[i]]
l[i] = llik(pars=pars,s=s,ct)
}
L = -sum(l)
return(L)
}
#calculate the mle for a set of segments
mle.ss <- function(S,init_par = c(0.5,0.5),ct){
p = subplex(par = init_par, fn = mllik.ss, ss=S,ct=ct)$par
return(p)
}
# NHPP
nhpp.missing <- function(pars,ct=15,miss=TRUE){
lambda0 = pars[1]
mu0 = pars[2]
t <- 0
s <- 0
if(miss) lambda <- function(t) lambda0*(1-exp(-mu0*(ct-t)))
else lambda <- function(t) lambda0*exp(-mu0*(ct-t))
Lambda <- function(tupper) integrate(f = lambda, lower = 0, upper = tupper)$value
Lambda_inv <- function(s){
v <- seq(0,ct+3, length.out = 1000)
min(v[Vectorize(Lambda)(v)>=s])
}
X <- numeric(0)
u <- rexp(1)
s <- u
while(s <= Lambda(ct)){
t <- Lambda_inv(s)
X <- c( X, t)
u <- rexp(1)
s <- s + u
}
return(X)
}
# simulate missing data given the observed segment
rec.seg <- function(obs,lambda,p,ct){
nhpp = nhpp.missing(c(lambda,p),ct=ct)
obsc = cumsum(obs)
df = data.frame(time=c(nhpp,obsc),to=c(rep(0,length(nhpp)),rep(1,length(obsc))))
df = df[order(df$time),]
return(list(t=c(df$time[1],diff(df$time)),to=df$to,obs=obs))
}
# monte-carlo sampling of size m
sim.srs <- function(Dt,lambda,p,m,ct){
Rec = vector(mode = 'list',length = m)
for(j in 1:m){
rec=rec.seg(Dt,lambda,p,ct)
Rec[[j]] = rec
}
return(Rec)
}
rel.llik <- function(S1,p0,p1,ct){
m = length(S1)
f1 = vector(mode='numeric',length = m)
f2 = vector(mode='numeric',length = m)
for(i in 1:m){
s = S1[[i]]
f1[i] = llik(pars=p1,s = s,ct = ct)
f2[i] = llik(pars=p0,s = s,ct = ct)
if(is.na(f1[i])) print(s)
}
Delta = -log(sum(f1/f2)/m)
if(is.na(Delta)) print(paste("f1=",f1,"f2=",f2,"m=",m))
return(Delta)
}
# mcem
pilot.chan <- function(Dt,ct,epsilon,m1=100){
# pilot study suggested by Chan et. al
l1 = 20 #must be even (odd?)
lambda = 1
p = 0.1
L = vector(mode="numeric",length = l1)
P = vector(mode="numeric",length = l1)
for(i in 1:l1){
srs = sim.srs(Dt,lambda,p,m1,ct)
mle = mle.ss(srs,ct = ct)
L[i] = lambda = mle[1]
P[i] = p = mle[2]
}
l = 10
L = L[(l1-10):l1]
P = P[(l1-10):l1]
Q = vector(mode="numeric",length = (l1-10))
for(i in 1:(l1-10)){
Delta = vector(mode="numeric",length = l)
for(j in 1:l){
srs = sim.srs(Dt,L[i],P[i],m1,ct)
mle = mle.ss(srs,ct = ct)
Delta[j] = rel.llik(S1 = srs,p0 = c(L[i],P[i]),p1 = mle,ct=ct)
}
mD = mean(Delta)
Q[i] = sum((Delta-mD)^2)
}
print(Q)
s2 = sum(Q)/((l-1)*(l1-10+1))
s1 = sqrt(s2)
m = m1*s1/epsilon
m = floor(m) + 1
return(list(m=m,p=mle,s1=s1))
}
mcem.seg <- function(Dt,ep=0.01,ct,pilot=F,p){
if(pilot){
print("calculating proper m, this might take a while")
pi = pilot.chan(Dt,ct,ep) #remove ep
m = pi$m
s1 = pi$s1
}
m = p$m
s1 = p$s1
sig = 100*s1/m
tol = 2*sig*sqrt(1/5)
D = Inf
k = 1
print("initializing mcem")
lambda = L = p$p[1]
p = P = p$p[2]
while(abs(D)>tol){
print(paste("iteration",k,"previous D: ",D, " required tolerance: ", tol))
srs = sim.srs(Dt,lambda,p,m,ct)
mle = mle.ss(srs,ct = ct)
D = rel.llik(S1 = srs,p0 = c(lambda,p),p1 = mle,ct=ct)
L[k+1] = lambda = mle[1]
P[k+1] = p = mle[2]
k = k+1
}
return(list(L=L,P=P))
}
First we check if that makes sense comparing simulation methods
m=10
M = matrix(ncol = 2,nrow = m)
for(j in 1:m){
n = 10
S = vector(mode='list',length=n)
ct=15
for(i in 1:n){
S[[i]] = sim.seg(0.8,0.05,ct=ct)
}
#print(S)
M[j,] = mle.ss(S = S,ct=ct)
}
summary(M)
## V1 V2
## Min. :0.7400 Min. :0.03762
## 1st Qu.:0.7767 1st Qu.:0.04309
## Median :0.8033 Median :0.04672
## Mean :0.8147 Mean :0.04985
## 3rd Qu.:0.8400 3rd Qu.:0.05373
## Max. :0.9267 Max. :0.07560
m=10
M = matrix(ncol = 2,nrow = m)
for(j in 1:m){
n = 10
ct=15
S = vector(mode='list',length=n)
for(i in 1:n){
S[[i]] = sim.seg2p(0.8,0.05,ct=ct)
}
#print(S)
M[j,] = mle.ss(S = S,ct=ct)
}
summary(M)
## V1 V2
## Min. :0.7000 Min. :0.03376
## 1st Qu.:0.7883 1st Qu.:0.04498
## Median :0.8000 Median :0.04786
## Mean :0.8307 Mean :0.12869
## 3rd Qu.:0.8050 3rd Qu.:0.06280
## Max. :1.1667 Max. :0.83333
comparing
p = proc.time()
n = 1000
d1 = vector(mode='numeric',length=n)
d2 = vector(mode='numeric',length=n)
ct=20
for(i in 1:n){
d1[i] = length(sim.seg(0.8,0.05,ct=ct)$t)
d2[i] = length(sim.seg2p(0.8,0.05,ct=ct)$t)
}
print(proc.time()-p)
## user system elapsed
## 485.237 3.354 490.164
summary(d1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.00 13.00 16.00 16.09 19.00 32.00
summary(d2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.00 13.00 16.00 16.16 19.00 32.00
hwitli
p=proc.time()
m=100
M = matrix(ncol = 2,nrow = m)
lambda=0.8
mu=0.1
ct=15
for(j in 1:m){
n = 10
ct=15
S = vector(mode='list',length=n)
s = sim.seg(lambda,mu,ct)
for(i in 1:n){
S[[i]] = rec.seg(s$obs,lambda,mu)
}
M[j,] = mle.ss(S = S,ct=ct)
}
print(p-proc.time())
## user system elapsed
## -186.714 -3.577 -198.272
summary(M)
## V1 V2
## Min. :0.4200 Min. :0.06022
## 1st Qu.:0.6600 1st Qu.:0.08450
## Median :0.7933 Median :0.09956
## Mean :0.8009 Mean :0.11487
## 3rd Qu.:0.9217 3rd Qu.:0.11974
## Max. :1.2867 Max. :0.56736
p=proc.time()
m=100
M = matrix(ncol = 2,nrow = m)
lambda=0.8
mu=0.1
ct=15
for(j in 1:m){
n = 10
ct=15
S = vector(mode='list',length=n)
s = sim.seg2p(lambda,mu,ct)
for(i in 1:n){
S[[i]] = rec.seg(s$obs,lambda,mu)
}
M[j,] = mle.ss(S = S,ct=ct)
}
summary(M)
## V1 V2
## Min. :0.4533 Min. :0.04810
## 1st Qu.:0.6783 1st Qu.:0.07599
## Median :0.7967 Median :0.10527
## Mean :0.8233 Mean :0.13037
## 3rd Qu.:0.9533 3rd Qu.:0.13969
## Max. :1.1933 Max. :0.83333
print(p-proc.time())
## user system elapsed
## -226.081 -4.184 -240.186
MCEM
seg = sim.seg(0.8,0.1,20)
obs = seg$obs
obs
time = proc.time()
pilot.chan(obs,20,0.0001,m1=10)
print(proc.time()-time)
time = proc.time()
pilot.chan(obs,20,0.0001,m1=10)
print(proc.time()-time)
time = proc.time()
pilot.chan(obs,20,0.0001,m1=100)
print(proc.time()-time)
time = proc.time()
pilot.chan(obs,20,0.0001,m1=100)
print(proc.time()-time)
time = proc.time()
pilot.chan(obs,20,0.0001,m1=100)
print(proc.time()-time)
time = proc.time()
pilot.chan(obs,20,0.0001,m1=100)
print(proc.time()-time)
time = proc.time()
p=pilot.chan(obs,20,0.0001,m1=30)
mcem.seg(obs,ep=0.0001,ct=20,p=p)
print(proc.time()-time)
Experiment
time = proc.time()
n = 50
L = vector(mode="numeric",length = n)
P = vector(mode="numeric",length = n)
for(i in 1:n){
seg = sim.seg(0.8,0.1,20)
obs = seg$obs
p=pilot.chan(obs,20,0.0001,m1=20)
mcem = mcem.seg(obs,ep=0.0001,ct=20,p=p)
print(mcem)
L[i] = mcem$L[length(mcem$L)]
P[i] = mcem$P[length(mcem$P)]
}
print(proc.time()-time)
summary(L)
summary(P)