#simulate segments, the output are waiting times, topology and observed waiting times
sim.seg <- function(lambda,p,n=1){
to = NULL
t = NULL
obs = NULL
for(i in 1:n){
to1=0
while(to1 == 0){
to1 = rbinom(1,1,p)
to = c(to,to1)
t = c(t,rexp(1,lambda))
}
obs = c(obs,sum(t)-sum(obs))
}
return(list(t=t,to=to,obs=obs))
}
# simulate missing data given the observed segment
rec.seg <- function(obs,lambda,p){
to=NULL
t=NULL
for(i in 1:length(obs)){
Dt = obs[i]
n = rpois(1,(1-p)*lambda*Dt)
it = sort(runif(n,min=0,max=Dt)) #inter-arrival times
t = c(t,diff(c(0,it,Dt)))
to = c(to,rep(0,times=n),1)
}
return(list(t=t,to=to))
}
#calculate the mle for a set of segments
mle.ss <- function(S){
m = length(S)
n = vector(mode='numeric',length = m)
d = vector(mode='numeric',length = m)
dt = vector(mode='numeric',length = m)
for(i in 1:m){
rec = S[[i]]
d[i] = length(rec$t)
n[i] = sum(rec$to)
dt[i] = sum(rec$t)
}
lambda.mle = sum(d)/sum(dt)
p.mle = sum(n)/sum(d)
return(list(lambda.mle=lambda.mle,p.mle=p.mle))
}
# monte-carlo sampling of size m
sim.srs <- function(Dt,lambda,p,m){
Rec = vector(mode = 'list',length = m)
d = vector(mode='numeric',length = m)
dt = vector(mode='numeric',length = m)
for(j in 1:m){
rec=rec.seg(Dt,lambda,p)
Rec[[j]] = rec
}
return(Rec)
}
lambda = 0.8
p = 0.5
#create random segment
seg = sim.seg(lambda = 0.8,p = 0.5)
#segment
seg
## $t
## [1] 0.3044863
##
## $to
## [1] 1
##
## $obs
## [1] 0.3044863
#observed
Dt = seg$obs
m=10000
# monte-carlo sampling
R = sim.srs(Dt,lambda,p,m)
# mle
mle.ss(R)
## $lambda.mle
## [1] 3.690478
##
## $p.mle
## [1] 0.8899172
# now with more segments
seg = sim.seg(lambda = 0.8,p = 0.5,n=25)
Dt=seg$obs
srs = sim.srs(Dt,lambda,p,m)
mle.ss(srs)
## $lambda.mle
## [1] 0.8661497
##
## $p.mle
## [1] 0.5379375
time = proc.time()
l=100
L=vector(mode = 'numeric',length=l)
P=vector(mode = 'numeric',length=l)
lambda=0.8
p=0.8
m=100
for(k in 1:l){
seg = sim.seg(lambda = lambda, p = p,n=40)
Dt = seg$obs
srs = sim.srs(Dt,lambda,p,m)
mle=mle.ss(srs)
L[k] = mle$lambda.mle
P[k] = mle$p.mle
}
print(proc.time()-time)
## user system elapsed
## 18.804 0.004 18.805
summary(L)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.6213 0.7267 0.8026 0.8142 0.8880 1.1734
summary(P)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.7361 0.7787 0.8046 0.7991 0.8195 0.8598
Quite accurate estimations, now let´s observe another with a lot of missing data
time = proc.time()
l=100
L=vector(mode = 'numeric',length=l)
P=vector(mode = 'numeric',length=l)
lambda=0.8
p=0.01
m=100
for(k in 1:l){
seg = sim.seg(lambda = lambda, p = p,n=40)
Dt = seg$obs
srs = sim.srs(Dt,lambda,p,m)
mle=mle.ss(srs)
L[k] = mle$lambda.mle
P[k] = mle$p.mle
}
print(proc.time()-time)
## user system elapsed
## 42.408 0.024 42.434
summary(L)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.7958 0.7988 0.8003 0.8004 0.8016 0.8080
summary(P)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.006423 0.009104 0.010308 0.010391 0.011778 0.016563
In general they are not biased and accurate. We can change the mc sampling size \(m\), the length of the tree \(n\) or the parameters, but they are more or less working fine. Now let´s see how the MCEM works.
mcem.light <- function(Dt,lambda,p,nit=100){
m=2
L=vector(mode = 'numeric',length=l)
P=vector(mode = 'numeric',length=l)
for(k in 1:l){
srs = sim.srs(Dt,lambda,p,m)
mle=mle.ss(srs)
L[k] = lambda = mle$lambda.mle
P[k] = p = mle$p.mle
m=m+1
}
return(list(L=L,P=P))
}
From now onwards I perform several MCEM routines to observe how is working with different input parameters.
time = proc.time()
l=300
lambda=0.8
p=0.5
seg = sim.seg(lambda = lambda, p = p,n=50)
Dt = seg$obs
mcem = mcem.light(Dt,lambda,p,m)
print(proc.time()-time)
## user system elapsed
## 104.456 0.012 104.465
qplot(1:l,mcem$L) + geom_hline(yintercept = lambda)
qplot(1:l,mcem$P) + geom_hline(yintercept = p)
time = proc.time()
l=300
lambda=0.8
p=0.5
#seg = sim.seg(lambda = 0.8, p = 0.5,n=50)
Dt = seg$obs
mcem = mcem.light(Dt,lambda,p,m)
print(proc.time()-time)
## user system elapsed
## 107.852 0.004 107.853
qplot(1:l,mcem$L) + geom_hline(yintercept = lambda)
qplot(1:l,mcem$P) + geom_hline(yintercept = p)
time = proc.time()
l=300
lambda=0.8
p=0.5
#seg = sim.seg(lambda = 0.8, p = 0.5,n=50)
Dt = seg$obs
mcem = mcem.light(Dt,lambda,p,m)
print(proc.time()-time)
## user system elapsed
## 108.536 0.000 108.531
qplot(1:l,mcem$L) + geom_hline(yintercept = lambda)
qplot(1:l,mcem$P) + geom_hline(yintercept = p)
time = proc.time()
l=300
lambda=0.8
p=0.5
#seg = sim.seg(lambda = 0.8, p = 0.5,n=50)
Dt = seg$obs
mcem = mcem.light(Dt,lambda,p,m)
print(proc.time()-time)
## user system elapsed
## 107.212 0.004 107.218
qplot(1:l,mcem$L) + geom_hline(yintercept = lambda)
qplot(1:l,mcem$P) + geom_hline(yintercept = p)
time = proc.time()
l=300
lambda=0.8
p=0.5
#seg = sim.seg(lambda = 0.8, p = 0.5,n=50)
Dt = seg$obs
mcem = mcem.light(Dt,lambda,p,m)
print(proc.time()-time)
## user system elapsed
## 107.140 0.000 107.136
qplot(1:l,mcem$L) + geom_hline(yintercept = lambda)
qplot(1:l,mcem$P) + geom_hline(yintercept = p)
time = proc.time()
l=300
lambda=0.8
p=0.1
seg = sim.seg(lambda = lambda, p = p,n=50)
Dt = seg$obs
mcem = mcem.light(Dt,lambda,p,m)
print(proc.time()-time)
## user system elapsed
## 122.004 0.000 121.995
qplot(1:l,mcem$L) + geom_hline(yintercept = lambda)
qplot(1:l,mcem$P) + geom_hline(yintercept = p)
time = proc.time()
l=300
lambda=0.8
p=0.1
#seg = sim.seg(lambda = 0.8, p = 0.5,n=50)
Dt = seg$obs
mcem = mcem.light(Dt,lambda,p,m)
print(proc.time()-time)
## user system elapsed
## 122.104 0.008 122.113
qplot(1:l,mcem$L) + geom_hline(yintercept = lambda)
qplot(1:l,mcem$P) + geom_hline(yintercept = p)
time = proc.time()
l=300
lambda=0.8
p=0.9
seg = sim.seg(lambda = lambda, p = p,n=50)
Dt = seg$obs
mcem = mcem.light(Dt,lambda,p,m)
print(proc.time()-time)
## user system elapsed
## 103.856 0.000 103.855
qplot(1:l,mcem$L) + geom_hline(yintercept = lambda)
qplot(1:l,mcem$P) + geom_hline(yintercept = p)
time = proc.time()
l=300
lambda=0.8
p=0.5
#seg = sim.seg(lambda = 0.8, p = 0.5,n=50)
Dt = seg$obs
mcem = mcem.light(Dt,lambda,p,m)
print(proc.time()-time)
## user system elapsed
## 105.464 0.000 105.465
qplot(1:l,mcem$L) + geom_hline(yintercept = lambda)
qplot(1:l,mcem$P) + geom_hline(yintercept = p)
time = proc.time()
l=300
lambda=0.8
p=0.99
seg = sim.seg(lambda = 0.8, p = 0.5,n=50)
Dt = seg$obs
mcem = mcem.light(Dt,lambda,p,m)
print(proc.time()-time)
## user system elapsed
## 102.188 0.000 102.189
qplot(1:l,mcem$L) + geom_hline(yintercept = lambda)
qplot(1:l,mcem$P) + geom_hline(yintercept = p)
m=100
for(k in 1:l){
srs = sim.srs(Dt,lambda,p,m)
mle=mle.ss(srs)
L[k] = lambda = mle$lambda.mle
P[k] = p = mle$p.mle
}
print(proc.time()-time)
## user system elapsed
## 49.056 0.024 49.092
qplot(1:length(P),L) + geom_hline(yintercept = 0.8)
qplot(1:length(P),P) + geom_hline(yintercept = 0.5)
P*L
## [1] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [8] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [15] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [22] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [29] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [36] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [43] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [50] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [57] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [64] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [71] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [78] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [85] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [92] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [99] 0.3568474 0.3568474
length(Dt)/sum(Dt)
## [1] 0.3568474
m=10
lambda=1.2
p=0.1
for(k in 1:l){
srs = sim.srs(Dt,lambda,p,m)
mle=mle.ss(srs)
L[k] = lambda = mle$lambda.mle
P[k] = p = mle$p.mle
}
print(proc.time()-time)
## user system elapsed
## 2.320 0.076 2.433
qplot(1:length(P),L) + geom_hline(yintercept = 0.8)
qplot(1:length(P),P) + geom_hline(yintercept = 0.5)
P*L
## [1] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [8] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [15] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [22] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [29] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [36] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [43] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [50] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [57] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [64] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [71] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [78] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [85] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [92] 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474 0.3568474
## [99] 0.3568474 0.3568474
length(Dt)/sum(Dt)
## [1] 0.3568474
time = proc.time()
l=100
L=vector(mode = 'numeric',length=l)
P=vector(mode = 'numeric',length=l)
lambda=0.8
p=0.5
m=10
seg = sim.seg(lambda = 0.8, p = 0.5,n=100)
Dt = seg$obs
for(k in 1:l){
srs = sim.srs(Dt,lambda,p,m)
mle=mle.ss(srs)
L[k] = lambda = mle$lambda.mle
P[k] = p = mle$p.mle
}
print(proc.time()-time)
## user system elapsed
## 5.160 0.000 5.168
qplot(1:length(P),L) + geom_hline(yintercept = 0.8)
qplot(1:length(P),P) + geom_hline(yintercept = 0.5)
qplot(1:length(P),P*L) + geom_hline(yintercept = 0.5*0.8)
time = proc.time()
l=100
L=vector(mode = 'numeric',length=l)
P=vector(mode = 'numeric',length=l)
lambda=0.8
p=0.5
m=10
seg = sim.seg(lambda = 0.8, p = 0.5,n=100)
Dt = seg$obs
for(k in 1:l){
srs = sim.srs(Dt,lambda,p,m)
mle=mle.ss(srs)
L[k] = lambda = mle$lambda.mle
P[k] = p = mle$p.mle
}
print(proc.time()-time)
## user system elapsed
## 5.792 0.000 5.805
qplot(1:length(P),L) + geom_hline(yintercept = 0.8)
qplot(1:length(P),P) + geom_hline(yintercept = 0.5)
qplot(1:length(P),P*L) + geom_hline(yintercept = 0.5*0.8)
time = proc.time()
l=100
L=vector(mode = 'numeric',length=l)
P=vector(mode = 'numeric',length=l)
lambda=0.8
p=0.5
m=100
seg = sim.seg(lambda = 0.8, p = 0.5,n=100)
Dt = seg$obs
for(k in 1:l){
srs = sim.srs(Dt,lambda,p,m)
mle=mle.ss(srs)
L[k] = lambda = mle$lambda.mle
P[k] = p = mle$p.mle
}
print(proc.time()-time)
## user system elapsed
## 48.676 0.008 48.728
qplot(1:length(P),L) + geom_hline(yintercept = 0.8)
qplot(1:length(P),P) + geom_hline(yintercept = 0.5)
qplot(1:length(P),P*L) + geom_hline(yintercept = 0.5*0.8)
time = proc.time()
l=1000
L=vector(mode = 'numeric',length=(l+1))
P=vector(mode = 'numeric',length=(l+1))
L[1] = lambda=0.8
P[1] = p = 0.5
m=10
seg = sim.seg(lambda = 0.8, p = 0.1,n=100)
Dt = seg$obs
for(k in 1:l){
srs = sim.srs(Dt,lambda,p,m)
mle=mle.ss(srs)
L[k+1] = lambda = mle$lambda.mle
P[k+1] = p = mle$p.mle
}
print(proc.time()-time)
## user system elapsed
## 53.200 0.000 53.271
qplot(1:length(P),L) + geom_hline(yintercept = 0.8)
qplot(1:length(P),P) + geom_hline(yintercept = 0.1)
qplot(1:length(P),P*L) + geom_hline(yintercept = 0.1*0.8)
qplot(1:(length(P)-1),diff(L))
time = proc.time()
l=100
L=vector(mode = 'numeric',length=(l+1))
P=vector(mode = 'numeric',length=(l+1))
L[1] = lambda=0.8
P[1] = p = 0.5
m=100
seg = sim.seg(lambda = 0.8, p = 0.1,n=100)
Dt = seg$obs
for(k in 1:l){
srs = sim.srs(Dt,lambda,p,m)
mle=mle.ss(srs)
L[k+1] = lambda = mle$lambda.mle
P[k+1] = p = mle$p.mle
}
print(proc.time()-time)
## user system elapsed
## 55.272 0.004 55.303
qplot(1:length(P),L) + geom_hline(yintercept = 0.8)
qplot(1:length(P),P) + geom_hline(yintercept = 0.1)
qplot(1:length(P),P*L) + geom_hline(yintercept = 0.1*0.8)
qplot(1:(length(P)-1),diff(L))
# NHPP
nhpp.missing <- function(pars,ct=15){
lambda_0 = pars[1]
mu_0 = pars[2]
t_max <- ct
t <- 0
s <- 0
lambda <- function(t) lambda_0*(1-exp(-mu_0*(ct-t)))
Lambda <- function(tupper) integrate(f = lambda, lower = 0, upper = tupper)$value
Lambda_inv <- function(s){
v <- seq(0,t_max+3, length.out = 1000)
min(v[Vectorize(Lambda)(v)>=s])
}
X <- numeric(0)
while(t <= t_max){
u <- rexp(1)
s <- s + u
if(s > Lambda(ct)) break
t <- Lambda_inv(s)
X <- c( X, t)
}
return(X)
}