\[\hat {\theta}_{mle} = arg\max_{\theta}(l(\theta)) = \frac {\sum_{i=1}^nt_i}{n} = \bar{t}\]
\[l(\theta) = -n\log\theta - \frac{1}{\theta}\sum_{i=1}^nt_i\]
# data
exact.fails <- c(17.88, 28.92, 33, 41.52, 42.12, 45.60, 48.40, 51.84, 51.96,
54.12, 55.56, 67.80, 68.64, 68.64, 68.88, 84.12, 93.12, 98.64,
105.12, 105.84, 127.92, 128.04, 173.40)
mle.theta <- mean(exact.fails)
mle.theta
## [1] 72.22087
# Function as input of optim()
neg.log.likelihood.exp <- function(theta, data){
n <- length(data)
l <- -n*log(theta) - 1/theta * sum(data)
# likelihood function
return(-l)
}
est.theta <- optim(100, neg.log.likelihood.exp,
data = exact.fails, method = "Brent", lower = 0, upper = 200)
par1<- est.theta$par
par1
## [1] 72.22087
log_l<- neg.log.likelihood.exp(par1, exact.fails)
The theoretical estimate \(\hat {\theta}_{mle} = \bar{t}\) is the same as the ML estimate.
\[l(\theta) = \log L(\theta)\]
\[= \sum_{j=1}^m d_j\log[F(t_j;\theta) - F(t_{j-1};\theta)]\]
\[= \sum_{j=1}^n d_j\log[\exp(-\frac{t_{j-1}}{\theta}) - \exp(-\frac{t_j}{\theta})]\]
In the \(Likelihood.exp()\) function, I add 3349.759 to the sum(lj) to \(\bf ensure\ that\ the\ output\ does\ not\ equal\ 0\), the reason of adding this operation is due to the fact that our goal targets on obtaining a maximum value of \(\hat {\theta}\) that maximizes likelihood function. However, \(\bf n2000\ contains\ values\ that\ exceeds\ computing\ limit\ of\ r\ function\ \exp()\), in other words, the likelihood value is so small that the \(exp()\) function only returns 0. Adding up the observed data by an appropriate number will solve this problem.
Since we have already known that the n200 is inside the computable range of \(\exp()\) function, I add the number such that the maximum value of the n2000’s likelihood function will equal the n200’s maximum value of the likelihood function.
# data
time <- matrix(c(0, 100, 300, 500, 700, 1000, 2000, 4000, 100, 300, 500, 700, 1000,
2000, 4000, 10^10), ncol = 2)
n200 <- c(41, 44, 24, 32, 29, 21, 9, 0) #dj
n20 <- c(3, 7, 4, 1, 3, 2, 0, 0)
n2000 <- c(292, 494, 332, 236, 261, 308, 73, 4) #dj
# Likelihood function
Likelihood.exp <- function(theta, time, dj){
lj<- rep(NA, nrow(time))
#div_num<- sum(dj)/200
for(j in 1:nrow(time)){
# likelihood function for interval-time data
lj[j] <- dj[j]*log(exp(-time[j,1]/theta)-exp(-time[j,2]/theta))
}
return(exp(sum(lj)+3349.759))
}
# run the 'Likelihood.exp'
thetax <- seq(400, 800, 0.01)
likelihood.y <- c()
for(i in 1:length(thetax)){
likelihood.y <- c(likelihood.y, Likelihood.exp(thetax[i], time, n2000))
}
indx.max <- which(likelihood.y == max(likelihood.y))
ml.y <- thetax[indx.max]
ml.y
## [1] 612.77
# Function as input of optim()
neg.log.likelihood.exp <- function(theta, time, dj){
lj <- rep(NA, nrow(time))
for(j in 1:nrow(time)){
#lj[j] <- dj[j]*log(exp(-time[j,1]/theta)-exp(-time[j,2]/theta))
lj[j] <- dj[j]*log(pexp(time[j,2], rate = 1/theta)-pexp(time[j,1], rate = 1/theta))
}
return(-sum(lj))
}
# run the 'neg.log.likelihood.exp'
theta.hat <- optim(550, neg.log.likelihood.exp, time = time, dj = n2000,
hessian = TRUE, method = "Brent", lower = 200, upper = 800)
theta.hat_2000<- theta.hat
theta.h_2000 <- theta.hat_2000$par
theta.h_2000
## [1] 612.7727
\[\hat{se}_\hat{\theta} = \sqrt {\left. \left[ \frac {-d^2l(\theta)}{d\theta^2}\right]^{-1}\right|_{\theta = \hat\theta}}\]
sd.theta <- sqrt(1/theta.hat_2000$hessian)
sd.theta
## [,1]
## [1,] 14.13289
plot(thetax, likelihood.y, type = "l",
main = "Likelihood function (Exponential distribution)")
lines(c(thetax[indx.max], thetax[indx.max]),
c(-1, Likelihood.exp(thetax[indx.max], time, n2000)), col = 2)
text(thetax[indx.max]+30, likelihood.y[indx.max], expression(hat(theta)))
text(thetax[indx.max]+76, likelihood.y[indx.max]*0.99,paste(" = ", thetax[indx.max]))
\[R(\theta) = \frac {L(\theta)}{L(\hat {\theta})}\]
\[-2\log R(\theta) \sim X_{(1)}^2\]
# Likelihood ratio function
LR <- function(theta, theta.hat, time, dj){
Likelihood.exp(theta, time, dj)/Likelihood.exp(theta.hat, time, dj)
}
# run the 'LR' for n2000
thetax <- seq(200, 1000, 0.02)
LR.theta_2000 <- c()
for(i in 1:length(thetax)){
LR.theta_2000 <- c(LR.theta_2000, LR(thetax[i], theta.h_2000, time, n2000))
}
# plot
plot(thetax, LR.theta_2000, type = "l", main = expression(paste("Likelihood Ratio ", R(theta))))
lines(c(0, 1000), c(exp(-qchisq(0.95, 1)/2), exp(-qchisq(0.95, 1)/2)), lty = 2)
lines(c(0, 1000), c(exp(-qchisq(0.9, 1)/2), exp(-qchisq(0.9, 1)/2)), lty = 2)
lines(c(0, 1000), c(exp(-qchisq(0, 1)/2), exp(-qchisq(0, 1)/2)), col = 2, lty = 2)
text(c(900, 900), c(0.2, 0.3), c("95%", "90%"))
# Solutions to the intersections of the lines
LR.CI <- function(theta, theta.hat, time, dj, alpha = 0.05){
(LR(theta, theta.hat, time, dj) - exp(-qchisq(1-alpha, 1)/2))^2
}
L.95 <- optim(500, LR.CI, lower = 200, upper = 600, method = "Brent",
theta.hat = theta.h_2000, time = time, dj = n2000, alpha = 0.05)
U.95 <- optim(600, LR.CI, lower = 600, upper = 800, method = "Brent",
theta.hat = theta.h_2000, time = time, dj = n2000, alpha = 0.05)
c(L.95$par, U.95$par)
## [1] 585.8676 641.3072
c(theta.h_2000 - qnorm(0.975)*sd.theta, theta.h_2000 + qnorm(0.975)*sd.theta)
## [1] 585.0727 640.4726
w <- exp(qnorm(0.975)*sd.theta/theta.h_2000)
c(theta.h_2000/w, theta.h_2000*w)
## [1] 585.6895 641.1083
estimates | n=2000 | n=200 | n=20 |
---|---|---|---|
ML estimate \(\hat {\theta}\) | 612.77 | 572 | 440 |
Standard Error \(\hat {se}_{\hat {\theta}}\) | 14.133 | 41.72 | 101 |
95% Confidence Intervals for \(\theta\) based on: | |||
Likeliihood | [585.87, 641.31] | [498, 662] | [289, 713] |
\(Z_{\log(\hat {\theta})}\ \dot{\sim}\ N(0,1)\) | [585.69, 640.11] | [496, 660] | [281, 690] |
\(Z_{\hat {\theta}}\ \dot{\sim}\ N(0,1)\) | [585.07, 641.47] | [491, 654] | [242, 638] |
其實我刻的很好欸!
n_Likelihood.exp <- function(theta, time, dj){
lj<- rep(NA, nrow(time))
#div_num<- sum(dj)/200
for(j in 1:nrow(time)){
# likelihood function for interval-time data
lj[j] <- dj[j]*log(exp(-time[j,1]/theta)-exp(-time[j,2]/theta))
}
return(exp(sum(lj)))
}
LR_n <- function(theta, theta.hat, time, dj){
n_Likelihood.exp(theta, time, dj)/n_Likelihood.exp(theta.hat, time, dj)
}
thetax <- seq(200, 1000, 0.02)
## ML estimate for n20
theta.hat <- optim(550, neg.log.likelihood.exp, time = time, dj = n20,
hessian = TRUE, method = "Brent", lower = 200, upper = 800)
theta.h_20 <- theta.hat$par
# Run the 'LR' for n20
LR.theta_20 <- c()
for(i in 1:length(thetax)){
LR.theta_20 <- c(LR.theta_20, LR_n(thetax[i], theta.h_20, time, n20))
}
## ML estimate for n200
theta.hat <- optim(550, neg.log.likelihood.exp, time = time, dj = n200,
hessian = TRUE, method = "Brent", lower = 200, upper = 800)
theta.h_200 <- theta.hat$par
# Run the 'LR' for n200
LR.theta_200 <- c()
for(i in 1:length(thetax)){
LR.theta_200 <- c(LR.theta_200, LR_n(thetax[i], theta.h_200, time, n200))
}
# Solutions to the intersections of the lines n=20
LR.CI_n <- function(theta, theta.hat, time, dj, alpha = 0.05){
(LR_n(theta, theta.hat, time, dj) - exp(-qchisq(1-alpha, 1)/2))^2
}
L.95_20 <- optim(500, LR.CI_n, lower = 200, upper = 600, method = "Brent",
theta.hat = theta.h_20, time = time, dj = n20, alpha = 0.05)
U.95_20 <- optim(600, LR.CI_n, lower = 600, upper = 800, method = "Brent",
theta.hat = theta.h_20, time = time, dj = n20, alpha = 0.05)
# Solutions to the intersections of the lines n=200
L.95_200 <- optim(500, LR.CI_n, lower = 200, upper = 600, method = "Brent",
theta.hat = theta.h_200, time = time, dj = n200, alpha = 0.05)
U.95_200 <- optim(600, LR.CI_n, lower = 600, upper = 800, method = "Brent",
theta.hat = theta.h_200, time = time, dj = n200, alpha = 0.05)
# Solutions to the intersections of the lines n=2000
LR.CI <- function(theta, theta.hat, time, dj, alpha = 0.05){
(LR(theta, theta.hat, time, dj) - exp(-qchisq(1-alpha, 1)/2))^2
}
L.95_2000 <- optim(500, LR.CI, lower = 200, upper = 600, method = "Brent",
theta.hat = theta.h_2000, time = time, dj = n2000, alpha = 0.05)
U.95_2000 <- optim(600, LR.CI, lower = 600, upper = 800, method = "Brent",
theta.hat = theta.h_2000, time = time, dj = n2000, alpha = 0.05)
# plot
plot(thetax, LR.theta_20, ylim = c(0, 1.2), type = "l",xlab = expression(theta), ylab = "Relative Likelihood", main = expression(paste("Likelihood Ratio ", R(theta))))
par(new = T)
plot(thetax, LR.theta_200, ylim = c(0, 1.2), type = "l", xlab = "", ylab = "", main = "", lty = 3)
par(new = T)
plot(thetax, LR.theta_2000, ylim = c(0, 1.2), type = "l", xlab = "", ylab = "", main = "", lty = 4)
lines(c(0, 1100), c(exp(-qchisq(0.95, 1)/2), exp(-qchisq(0.95, 1)/2)), col = 2)
lines(c(596, 596), c(-10, 10))
lines(c(L.95_20$par,L.95_20$par), c(-1, exp(-qchisq(0.95, 1)/2)))
lines(c(U.95_20$par,U.95_20$par), c(-1, exp(-qchisq(0.95, 1)/2)))
lines(c(L.95_200$par,L.95_200$par), c(-1, exp(-qchisq(0.95, 1)/2)))
lines(c(U.95_200$par,U.95_200$par), c(-1, exp(-qchisq(0.95, 1)/2)))
lines(c(L.95_2000$par,L.95_2000$par), c(-1, exp(-qchisq(0.95, 1)/2)))
lines(c(U.95_2000$par,U.95_2000$par), c(-1, exp(-qchisq(0.95, 1)/2)))
text(950, exp(-qchisq(0.95, 1)/2)+0.06, "95%")
text(c(300, 470, 700), c(0.5, 0.4, 0.85), c("n=20", "n=200", "n=2000"), cex = 0.6, col = 6)
text(410, 1.15, "estimate of mean using all n=10200 times ->", cex = 0.5)
exact.fails <- c(17.88, 28.92, 33, 41.52, 42.12, 45.60, 48.40, 51.84, 51.96, 54.12, 55.56, 67.80, 68.64, 68.64, 68.88, 84.12, 93.12, 98.64,
105.12, 105.84, 127.92, 128.04, 173.40)
Fit the two-parameter exponential distribution (\(\theta\), \(\gamma\)). Give the point estimates.
\[l(\theta) = -\frac {\sum_{i=1}^n t_i}{\theta} - \frac {n\gamma}{\theta} -n\log\theta\]
\[\hat {\theta}_{mle} = 66.51161\]
\[\hat {\gamma}_{mle} = 34.69434\]
neg.log.likelihood.2exp <- function(par, data){
theta<- par[1]
gamma<- par[2]
n <- length(data)
if (gamma > min(data)){
l<- -1000000
}else{
l <- -n*log(theta) - 1/theta * sum(data) + n*gamma/theta
}
# log-likelihood function
return(-l)
}
scale<- mean(exact.fails)
location<- min(exact.fails)
est.theta2 <- optim(c(scale, location), neg.log.likelihood.2exp, data = exact.fails)
par2<- est.theta2$par
par2
## [1] 54.34697 17.88000
Use the AIC to compare which model is better (one- or two parameter distribution).
\[AIC = -2(\log-likelihood) + 2p\]
\(p\) is the number of model parameters.
\[AIC_{1pexp} = 244.8675\]
\[AIC_{2pexp} = 223.033\]
According to the output, estimates for 2-parameters exponential distribution fits the data better in terms of lower AIC index, this is quite explanable since 2 parameter exponential distribution has an extra location parameter that is capable of better fitting the data. No wonder everytime I search for 2 parameter exponential distribution, the topics usually relate to reliability analysis.
AIC_1 <- -2*-log_l+2*length(par1)
AIC_2 <- -2*-neg.log.likelihood.2exp(par2, exact.fails)+2*length(par2)
AIC_1 ; AIC_2
## [1] 244.8675
## [1] 233.7827
# plot
library(tolerance)
plot(density(exact.fails), ylim = c(0, 0.015), xlab = "x", ylab = "prob", main = "density of exact.fails")
curve(dexp(x, rate = 1/par1), col = 3, add = T)
curve(d2exp(x, rate = par2[1], shift = par2[2]), col = 2, add = T)
legend("topright", legend = c("1pexp", "2pexp"), col = c(3, 2), lty = 1)
Intuitively, the lognormal distribution states that observed data after log-transformation follows a normal distribution.
\[F(t) = \Phi_{nor}(\frac {\log t-\mu}{\sigma})\]
\[f(t) = \frac {1}{\sigma t}\phi_{nor}(\frac {\log t-\mu}{\sigma}) = \frac {1}{\sigma t\sqrt{2\pi}}\exp(-\frac{1}{2}(\frac {\log t - \mu}{\sigma})^2)\]
\(\mu = \rm {location\ parameter}\)
\(\sigma = \rm {shape\ parameter}\)
Based on the dataset \(ShockAbsorber.txt\) on moodle, use the lognormal distribution \(\rm {LN}(\mu, \sigma)\) to fit the data by the maximum likelihood (ML) method and provide the following estimates.
shock.absorber <- read.table("ShockAbsorber.txt", header = T)
shock.absorber$Censor <- ifelse(shock.absorber$Censor == "Failure", 1, 0)
head(shock.absorber)
## Distance Mode Censor
## 1 6700 Mode1 1
## 2 6950 Censored 0
## 3 7820 Censored 0
## 4 8790 Censored 0
## 5 9120 Mode2 1
## 6 9660 Censored 0
\[\hat{\mu} = 10.145\]
\[\hat{\sigma} = 0.530\]
log.likelihood.ln <- function(mu, sigma, data){
lj <- c()
for(j in 1:length(data[,1])){
likelihood <- data[j, 3]*dlnorm(data[j, 1], mu, sigma, log = TRUE) + (1-data[j, 3])*log(1-plnorm(data[j, 1], mu, sigma))
lj <- c(lj, likelihood)
}
return(sum(lj))
}
minus.log.likelihood.ln <- function(pars, data){
# if loop: for parameters <=0, the dweibull returns NaNs
if (pars[1]<=0 | pars[2]<=0){
return(100000)
} else {
return(-log.likelihood.ln(pars[1], pars[2], data))
}
}
mu0 <- 10
sig0 <- 0.5
initial <- c(mu0, sig0)
par.hat <- optim(initial, minus.log.likelihood.ln, data = shock.absorber, hessian = TRUE)
est.par <- c(par.hat$par[1], par.hat$par[2])
est.par
## [1] 10.1447348 0.5301075
\[\hat{t_{0.1}} = \exp(\hat {\mu} + \Phi^{-1}_{nor}(0.1)\hat {\sigma}) = 12905.06\]
\[\hat F(10000) = \Phi_{nor}(\frac {\log 10000-\hat {\mu}}{\hat {\sigma}}) = 0.0389797\]
# t_p
t_0.1<- exp(est.par[1]+ qnorm(0.1)*est.par[2])
t_0.1
## [1] 12905.06
# F(10000)
F_10000<- pnorm((log(10000)-est.par[1])/est.par[2])
F_10000
## [1] 0.0389797
# plot
plot(density(shock.absorber$Distance[which(shock.absorber$Censor==1)]), main = "", xlab = "t")
curve(dlnorm(x, est.par[1], est.par[2]), add = T, col =2)
lines(c(t_0.1, t_0.1), c(0, dlnorm(t_0.1, est.par[1], (est.par[2])^2)), col = 3)
\[\hat {se}_{\hat {\mu}} = 0.1441874\]
\[\hat {se}_{\hat {\sigma}} = 0.1126943\]
cov.var.M<- round(solve(par.hat$hessian), 5)
se_mu<- sqrt(cov.var.M[1,1])
se_sigma<- sqrt(cov.var.M[2,2])
se_mu ; se_sigma
## [1] 0.1441874
## [1] 0.1126943
\[c = \left[ \begin{matrix} \frac {\partial t_p}{\partial \mu} \\ \frac {\partial t_p}{\partial \sigma} \end{matrix} \right] = \left. \left[ \begin{matrix} t_p \\ t_p\Phi^{-1}_{nor}(p) \end{matrix} \right]\right| _{\begin{matrix}\mu = \hat{\mu} \\ \sigma = \hat {\sigma} \\ p = 0.1\end{matrix}}\]
\[\hat {Var}_{\hat {t_p}} = \hat {c}^T \hat{\sum}_{\hat {\mu},\hat {\sigma}} \hat {c}\]
\[\hat {se}_{\hat {t_p}} = \sqrt{\hat {Var}_{\hat {t_p}}} = 1666.879\]
c <- matrix(t_0.1*c(1, qnorm(0.1)), nrow = 2)
se_t0.1<- as.numeric(sqrt(t(c)%*%cov.var.M%*%c))
se_t0.1
## [1] 1666.879
\[c = \left[ \begin{matrix} \frac {\partial F(t)}{\partial \mu} \\ \frac {\partial F(t)}{\partial \sigma} \end{matrix} \right] = \left. \left[ \begin{matrix} \frac {\phi_{nor}(z)}{-\hat {\sigma}} \\ \frac {\phi_{nor}(z)z}{-\hat {\sigma}} \end{matrix} \right]\right| _{\begin{matrix}\mu = \hat{\mu} \\ \sigma = \hat {\sigma} \\ t = 10000\end{matrix}} ,\ \ \ \ z = \frac {\log(10000) - \hat{\mu}}{\hat {\sigma}}\]
\[\hat {Var}_{\hat {F(t)}} = \hat {c}^T \hat{\sum}_{\hat {\mu},\hat {\sigma}} \hat {c}\]
\[\hat {se}_{\hat {F(t)}} = \sqrt{\hat {Var}_{\hat {F(t)}}} = 0.02562305\]
z<- (log(10000) - est.par[1])/est.par[2]
c <- matrix(dnorm(z)/-est.par[2]*c(1, z), nrow = 2)
se_F10000<- as.numeric(sqrt(t(c)%*%cov.var.M%*%c))
se_F10000
## [1] 0.02562305
\(\mu\)
mu_l<- est.par[1] - qnorm(0.975)*se_mu
mu_u<- est.par[1] + qnorm(0.975)*se_mu
c(mu_l, mu_u)
## [1] 9.862133 10.427337
\(\sigma\)
w <- exp(qnorm(0.975)*se_sigma/est.par[2])
sigma_l<- est.par[2]/w
sigma_u<- est.par[2]*w
c(sigma_l, sigma_u)
## [1] 0.3494693 0.8041162
\(t_{0.1}\)
w <- exp(qnorm(0.975)*se_t0.1/t_0.1)
tp_l<- t_0.1/w
tp_u<- t_0.1*w
c(tp_l, tp_u)
## [1] 10018.78 16622.84
\(F(10000)\)
w <- exp(qnorm(0.975)*se_F10000/(F_10000*(1-F_10000)))
F10000_l<- F_10000/(F_10000 + (1-F_10000)*w)
F10000_u<- F_10000/(F_10000 + (1-F_10000)/w)
c(F10000_l, F10000_u)
## [1] 0.01050253 0.13419942
estimates | \(\mu\) | \(\sigma\) | \(t_p\) | \(F(10000)\) |
---|---|---|---|---|
ML estimate | 10.145 | 0.530 | 12905.06 | 0.0389797 |
Standard error of ML estimate | 0.1441874 | 0.1126943 | 1666.879 | 0.02562305 |
Confidence interval | [9.862, 10.427] | [0.349, 0.804] | [10018.78, 16622.84] | [0.011, 0.134] |