Our estimator \(\widetilde{S}(x)\) is computed from INE(Iterative Nelson Estimator) approach,you can see the approach method in section2.we now asses their performane using Monte Carlo simulation
\[ Xi\stackrel{i.i.d}{\sim} Gamma( 2,1) \quad i=1,...,n \] \[ Ti\stackrel{i.i.d}{\sim} Unif( 0,4) \quad i=1,...,n \]
\[ n=100\quad sample \ size \]
Thus,we only know that \(Li<=Xi<=Ui\) iff \(Xi>=Ti\).It is possible that \(Ui= \infty\)
Write a function called “samplemaker_gamma” to make sample
samplemaker_gamma <- function(n=100,shape=2,scale=1,end=4 ){
x <- rgamma(n,shape = 2,scale = 1)
t <- runif(n,0,4)
delta <- 0.5
M <- 1
observedx <- x[which(x>t)]
observedt <- t[which(x>t)]
data <- data.frame(observedx,observedt)
status <- ifelse( observedx<=observedt+M*delta, 3 , 0) #maybe change
qi <- ifelse(status==3, observedt ,observedt+M*delta )
pi <- ifelse( status==3, observedt+M*delta , Inf )
datause <- data.frame(observedx,observedt ,Li=qi,Ui=pi,status,entry=observedt)
return(datause)
}
samplemaker_weibull<- function(n=130,shape=4,scale=1,end=1.5 ){
#n <- 500
#shape <- 4
#scale <- 1
x <- rweibull(n,shape,scale)
t <- runif(n,0,end)
delta <- 0.5
M <- 1
observedx <- x[which(x>t)]
observedt <- t[which(x>t)]
status <- ifelse( observedx<=observedt+M*delta, 3 , 0) #maybe change
qi <- ifelse(status==3, observedt ,observedt+M*delta )
pi <- ifelse( status==3, observedt+M*delta , Inf )
datause <- data.frame(observedx,observedt ,Li=qi,Ui=pi,status,entry=observedt)
return(datause)
}
In each replication:
Draw the true Survival function of Xi and the mean of \(\widetilde{S}(x)\) and its lower and upper 5% quantiles of the estimates
Our INE approach includes 5 step:
step 1:To construct the estimator \(\widetilde{S}(x)\) , let \(0 = \tau_0 < \tau_1 < \tau_2 < ...< \tau_m\) be a grid of time which includes all the points \(Li,Ui\) from data.Thus,we have m interval.This method is maded by Giolo.
step 2:For i=1,…,n. For the ith observation, define a weight \(\alpha_{ij}\) to be \(1\) if the interval \((\tau_{j-1},\tau_{j}]\) is contained in the interval \((L_i,U_i]\) and 0,otherwise.
step 3:Let \(r=0\);give an intitila estimate \(\widetilde{S}^{(0)}\) of the survival function
step 4:Under the current estimate of the survival function \(\widetilde{S}^{(r)}\), compute \(d_j\) , the expected number of deaths in each interval as in Turnbull’s self-consistency algorithm. Let \(R_j=\sum_{i\geq j}d_i\)
\[ p_j=S(\tau_{j-1})^-S(\tau_{j})\quad j=1,...,m\]
\[ d_j=\sum_{i=1}^{n}\frac{\alpha_{ij}p_j}{\sum_{k=1}^{m}\alpha_{ik}p_k}\quad j=1,...,m\]
datatest is a test data (n=7)
set.seed(5)
datatest <- samplemaker_gamma(n=7)
print("datatest")
## [1] "datatest"
datatest
## observedx observedt Li Ui status entry
## 1 4.3283 2.8828 3.3828 Inf 0 2.8828
## 2 0.8525 0.8454 0.8454 1.345 3 0.8454
## 3 1.6879 0.9029 1.4029 Inf 0 0.9029
## 4 0.6517 0.5599 0.5599 1.060 3 0.5599
cria.tau <- function(data){
l <- data$Li
r <- data$Ui
tau <- sort(unique(c(l,r[is.finite(r)])))
#tau <- c( 0,tau,100000)
return(tau)
}
print("tau")
## [1] "tau"
cria.tau( datatest )
## [1] 0.5599 0.8454 1.0599 1.3454 1.4029 3.3828
write a fucntion do step 1 continue
makeinterval <- function(data){
x <- matrix( c(0,rep(cria.tau( data ),rep(2,length( cria.tau( data ) ))) ,100000 )
,ncol = 2 ,byrow = T)
q <- x[,1]
p <- x[,2]
data.frame(q,p)
}
we show the interval in datatest
makeinterval(datatest)
## q p
## 1 0.0000 5.599e-01
## 2 0.5599 8.454e-01
## 3 0.8454 1.060e+00
## 4 1.0599 1.345e+00
## 5 1.3454 1.403e+00
## 6 1.4029 3.383e+00
## 7 3.3828 1.000e+05
make_A_turnbull <- function(datacheck){
qj<- makeinterval(datacheck)$q
pj<- makeinterval(datacheck)$p
li <- datacheck$Li
ri <- datacheck$Ui
n <- length(li)
m <- length(pj)
A <- matrix(0,nrow = n,ncol = m)
intmapL <- signif(qj,4)
intmapR <- signif(pj,4)
k <- dim(A)[[2]]
Lbracket <- rep("(", k)
Rbracket <- rep(")", k)
intname <- paste(Lbracket, intmapL, ",", intmapR, Rbracket,
sep = "")
intmapL2 <- signif(datacheck$Li, 4)
intmapR2 <- signif(datacheck$Ui, 4)
k2 <- dim(A)[[1]]
Lbracket2 <- rep("(", k2)
Rbracket2 <- rep(")", k2)
intname2 <- paste(Lbracket2, intmapL2, ",", intmapR2, Rbracket2,
sep = "")
for(j in 1:m){
for( i in 1:n){
if( li[i]<=qj[j]&&ri[i]>=pj[j] ){
A[i,j] <- 1
}
else{A[i,j] <- 0}
}
}
colnames(A) <- intname
rownames(A) <- intname2
return(A)
}
we show the corrsponding \(\alpha_{ij}\) in datatest
datatest_A <- make_A_turnbull(datatest)
datatest_A
## (0,0.5599) (0.5599,0.8454) (0.8454,1.06) (1.06,1.345)
## (3.383,Inf) 0 0 0 0
## (0.8454,1.345) 0 0 1 1
## (1.403,Inf) 0 0 0 0
## (0.5599,1.06) 0 1 1 0
## (1.345,1.403) (1.403,3.383) (3.383,1e+05)
## (3.383,Inf) 0 0 1
## (0.8454,1.345) 0 0 0
## (1.403,Inf) 0 1 1
## (0.5599,1.06) 0 0 0
\[ p_j=S(\tau_{j-1})^-S(\tau_{j})\quad j=1,...,m\]
\[ d_j=\sum_{i=1}^{n}\frac{\alpha_{ij}p_j}{\sum_{k=1}^{m}\alpha_{ik}p_k}\quad j=1,...,m\]
#data <- samplemaker_gamma()
myfunction_turnbull<- function(data,iter.max=2000){
A <- Aintmap(L= data$Li,R=data$Ui)$A
tau <-Aintmap(L= data$Li,R=data$Ui)$intmap[2,]
#A
s <- rep( 1/dim(A)[2], dim(A)[2] ) #give initail guess
eps=0.00001 #tolerance
n<-nrow(A) #number of observation
m<-ncol(A) #number of interval
S=cumsum(s)
S=S[m:1]
#S
Q<-matrix(1,m)
#Q
#length(S)
#dim(A)
iter <- 0
repeat {
iter <- iter + 1
diff<- (Q-S)
#diff
maxdiff<-max(abs(as.vector(diff)))
#maxdiff
if (maxdiff<eps|iter>=iter.max)
break
Q <- S
#Q
#s
#length(p)
#dim(A)
#s
den <- rep(0,n)
densum <- rep(0,n)
d<- rep(0,m)
###compute expected death
for(j in 1:m){
for(i in 1:n){
if(A[i,j]>0){
den[i]<- sum(A[i,]*s)
densum[i]<- A[i,j]*s[j]/den[i]
d[j]<- sum( densum )
}
}
}
###compute expected death
#d
Rj <-NULL
for(j in 1:m){
Rj [j]<- sum( d[j:m] )
}
lamdatelta <- d/Rj
#lamdatelta
cumlatedhazard<- cumsum(lamdatelta)
#cumlatedhazard
S<-exp(-cumlatedhazard )
#S
SS<- S
SS[m] <- 0
#c(1,S[1:m-1])-SS
s <- c(1,S[1:m-1])-SS
#s
#sum(s)
S[1] <- 1
S[m] <- 0
}
#tau
c <- data.frame(tau,S)
return(c)
}
b <- myfunction_turnbull( samplemaker_gamma(n=100))
plot(b$tau,b$S,type = "l",lty=2,c(0,5),xlab = "age",ylab="S(x)")
x = seq(0, 5, 0.01)
shape = 2
rate = 1
cdf2 = pgamma(x, shape, scale = 1)
lines(x,1-cdf2)
a <- myfunction_turnbull( samplemaker_weibull( ))
plot(a$tau,a$S,type = "l",lty=2,xlim = c(0,2),xlab = "age",ylab="S(x)")
curve(exp(-(x/1)^4),from=0,to=2,add = T)
the function do the interpolation
myexperiment_gamma=function(n=100 ){
xout <- seq(0,5,0.01)
data <- samplemaker_gamma(n)
aa <- myfunction_turnbull(data)
#plot(aa$tau,aa$S,type = "l",col="blue",lty=2)
#lines(x,1-cdf,col="green")
#length(aa$tau)
#length(aa$p)
approx_y <- approx(aa$tau,aa$S, xout=xout,yleft=1 ,yright=0 )
approx_y <- approx_y$y
#plot(xout,approx_y ,type = "l",col="blue",lty=2 )
#lines(x,1-cdf,col="green")
return(approx_y)
}
R=500
x = seq(0, 5, 0.01)
# Now define the parameters of your gamma distribution
shape = 2
rate = 1
# Now calculate points on the cdf
cdf = pgamma(x, shape, rate)
# Shown plotted here
x = seq(0, 5, 0.01)
par(mfrow=c(1,1))
xout <- seq(0,5,0.01)
dd <- replicate(n=R, myexperiment_gamma(n=100 ))
order <- t(apply(dd,1,sort))
mean <- apply(order,1,mean)
plot(xout,mean,type = "l",col="blue",lty=2,main="gamma(shape=2,sclae=1,R=500,n=100)",xlim = c(0,5),ylim = c(0,1))
lines(x,1-cdf,col="green")
lines(xout,order[,round(R*0.025)] , col="blue",lty=3 )
#lines(xout,order[,round(R*0.2)] , col="blue",lty=3)
lines(xout,order[,round(R*0.975)] , col="blue",lty=3 )