#### Part 2. Locomotive Control Reliability Data
(type I censored)
\(x_i\) : failure times of i-th item (miles)
\(c_i\) : censoring times (=136,000 miles)
\(y_i = min(x_i, c_i)\)
\(\delta_i = I(x_i <c_i)\), \(i = 1, ..., n\)
Objective: 중도절단된(censored) 기관차 부품의 수명자료 (reliabilty data)를 이용하여 부 품의 수명분포를 추정하여 부품의 신뢰도(reliability)를 예측.
rm(list=ls())
options(scipen=100)
options(crayon.enabled = FALSE)
setwd("D:\\0_grad\\data")
library(tidyverse)
library(readxl)
library(optimx)
library(knitr)
library(kableExtra)
loco <- read_xls("Locomotive-FDB-datasets.xls", sheet = 1, col_names = c("y", "delta"), skip=1)
attach(loco)
#y
loco %>% filter(delta == 1) %>%
ggplot(aes(y)) + geom_histogram(breaks = seq(20000,140000,10000), color="white") +
theme_bw() +
labs("title" = "Histogram of Complete Data (y)")
#lny
loco %>% filter(delta == 1) %>%
ggplot(aes(log(y))) + geom_histogram(breaks = seq(10, 12, .2), color="white") +
theme_bw() +
labs("title" = "Histogram of Complete Data (lny)", x="lny")
loco <- loco %>% filter(delta == 1) %>%
mutate(m= sum(delta), rank=rank(y), pr = rank/(m+1),
gompertz = log(-log(1-pr)),
normal = qnorm(pr),
gumbel = -log(-log(pr)))
#2-parameter Weibull
qq1 <- ggplot(loco, aes(gompertz, log(y))) +
geom_point(size = 2) + geom_smooth(method = "lm", se = F) +
labs(title = "2-Parameter Weibull Q-Q Plot",
subtitle = paste("R.sq = ", round(summary(lm(log(y) ~ gompertz, loco))$r.squared,4)*100, "%", sep = "")) +
theme_bw()
#3-parameter Weibull
a <- seq(-50000, 22400, 100)
r.sq <- NULL
for (i in 1:length(a)) {
r.sq[i] <- summary(lm(log(loco$y - a[i])~gompertz, loco))$r.squared
}
a <- a[which.max(r.sq)]
qq2 <- ggplot(loco, aes(gompertz, log(y-a))) +
geom_point(size = 2) + geom_smooth(method = "lm", se = F) +
labs(title = "3-Parameter Weibull Q-Q Plot",
subtitle = paste("R.sq = ", round(summary(lm(log(y-a) ~ gompertz, loco))$r.squared,4)*100, "%", sep = ""),
y=paste0("ln(y-a), a=", a)) +
theme_bw()
#log-nomal
qq3 <- ggplot(loco, aes(normal, log(y))) +
geom_point(size = 2) + geom_smooth(method = "lm", se = F) +
labs(title = "Log-normal Q-Q Plot",
subtitle = paste("R.sq = ", round(summary(lm(log(y) ~ normal, loco))$r.squared,4)*100, "%", sep = "")) +
theme_bw()
#Gompertz
qq4 <- ggplot(loco, aes(gompertz, y)) +
geom_point(size = 2) + geom_smooth(method = "lm", se = F) +
labs(title = "Gompertz Q-Q Plot",
subtitle = paste("R.sq = ", round(summary(lm(y ~ gompertz, loco))$r.squared,4)*100, "%", sep = "")) +
theme_bw()
#Gumbel
qq5 <- ggplot(loco, aes(gumbel, y)) +
geom_point(size = 2) + geom_smooth(method = "lm", se = F) +
labs(title = "Gumbel Q-Q Plot",
subtitle = paste("R.sq = ", round(summary(lm(y ~ gumbel, loco))$r.squared,4)*100, "%", sep = "")) +
theme_bw()
#normal
qq6 <- ggplot(loco, aes(normal, y)) +
geom_point(size = 2) + geom_smooth(method = "lm", se = F) +
labs(title = "Normal Q-Q Plot",
subtitle = paste("R.sq = ", round(summary(lm(y ~ normal, loco))$r.squared,4)*100, "%", sep = "")) +
theme_bw()
library(gridExtra)
grid.arrange(qq1, qq2, qq3, qq4, qq5, qq6, nrow=2)
c) 다음 각 모형에 대하여 해당 모수의 MLE 및 AIC 값을 구하시오.
#2-parameter Weibull
weibull.lm.fit.2p <- lm(log(y) ~ gompertz, loco)
weibull.2p.parameter <- c(0, weibull.lm.fit.2p$coefficients[1], weibull.lm.fit.2p$coefficients[2])
#log-likihood : sum( delta * log( pdf ) ) + sum( (1-delta) * log( 1-CDF ) )
weibull.2p.mle.fn <- function(variable){
mu <- variable[1]
sigma <- variable[2]
u <- ( log(y) - mu ) / sigma
F <- 1 - exp( - exp(u) )
f <- exp( u - exp(u))/(sigma*y)
- sum(delta * log(f) + (1-delta)*log(1-F))
}
weibull.optim <- optimx(par=c(weibull.2p.parameter[2], weibull.2p.parameter[3]), weibull.2p.mle.fn,
control=list(all.methods=TRUE,trace=0))
weibull.2p <- c(0, optim(par=c(weibull.2p.parameter[2], weibull.2p.parameter[3]), weibull.2p.mle.fn,
method="BFGS",hessian=T)$par,
2*weibull.optim$value[1],
2*2)
#3-parameter Weibull
weibull.lm.fit.3p <- lm(log(y-a) ~ gompertz, loco)
weibull.3p.parameter <- c(a, weibull.lm.fit.3p$coefficients)
weibull.3p.mle.fn <- function(variable){
a <- variable[1]
mu <- variable[2]
sigma <- variable[3]
u <- ( log(y-a) - mu ) / sigma
F <- 1 - exp( - exp(u) )
f <- exp( u - exp(u))/(sigma*(y-a))
- sum(delta * log(f) + (1-delta)*log(1-F))
}
weibull.optim.3p <- optimx(par=weibull.3p.parameter, weibull.3p.mle.fn,
control=list(all.methods=TRUE))
weibull.3p <- c(optim(par=weibull.3p.parameter, weibull.3p.mle.fn,
method="BFGS",hessian=T)$par,
2*weibull.optim.3p$value[1], 2*3)
#log-normal
lognormal.lm.fit <- lm( log(y) ~ normal, loco )
log.normal.parameter <- c(lognormal.lm.fit$coefficients)
lognormal.mle.fn <- function(variable){
mu <- variable[1]
sigma <- variable[2]
u <- ( log(y) - mu ) / sigma
F <- pnorm(u)
f <- dnorm(u)/(sigma*y)
- sum(delta * log(f) + (1-delta)*log(1-F))
}
log.normal.optim <- optimx(par=log.normal.parameter, lognormal.mle.fn,
control=list(all.methods=TRUE,trace=0))
log.normal <- c(0, optim(par=log.normal.parameter, lognormal.mle.fn,
method="BFGS",hessian=T)$par,
2*log.normal.optim$value[1], 2*2)
#Gompertz
gompertz.lm.fit <- lm( y ~ gompertz, loco)
gompertz.parameter <- c(gompertz.lm.fit$coefficients)
gompertz.mle.fn <- function(variable){
mu <- variable[1]
sigma <- variable[2]
u <- ( y - mu ) / sigma
F <- 1 - exp( - exp(u) )
f <- exp( u - exp(u))/sigma
- sum(delta * log(f) + (1-delta)*log(1-F))
}
gompertz.optim <- optimx(par=gompertz.parameter, gompertz.mle.fn,
control=list(all.methods=TRUE,trace=0))
gompertz <- c(0, optim(par=gompertz.parameter, gompertz.mle.fn,
method="BFGS",hessian=T)$par,
2*gompertz.optim$value[1], 2*2)
#Gumbel
gumbel.lm.fit <- lm(y ~ gumbel, loco)
gumbel.parameter <- c(gumbel.lm.fit$coefficients)
gumbel.mle.fn <- function(variable){
mu <- variable[1]
sigma <- variable[2]
u <- ( y - mu ) / sigma
F <- exp( - exp(-u) )
f <- exp(-u - exp(-u))/sigma
- sum(delta * log(f) + (1-delta)*log(1-F))
}
gumbel.optim <- optimx(par=gumbel.parameter, gumbel.mle.fn,
control=list(all.methods=TRUE,trace=0))
gumbel <- c(0, optim(par=gumbel.parameter, gumbel.mle.fn,
method="BFGS",hessian=T)$par,
2*gumbel.optim$value[1],
2*2)
#Normal
normal.lm.fit <- lm( y ~ normal, loco)
normal.parameter <- c(normal.lm.fit$coefficients)
normal.mle.fn <- function(variable){
mu <- variable[1]
sigma <- variable[2]
u <- ( y - mu ) / sigma
F <- pnorm(u)
f <- dnorm(u)/sigma
- sum(delta * log(f) + (1-delta)*log(1-F))
}
normal.optim <- optimx(par=normal.parameter, normal.mle.fn,
control=list(all.methods=TRUE,trace=0))
normal <- c(0, optim(par=normal.parameter, normal.mle.fn,
method="BFGS",hessian=T)$par,
2*normal.optim$value[1],
2*2)
result.c <- data.frame(rbind(weibull.2p, weibull.3p, log.normal, gompertz, gumbel, normal))
colnames(result.c) <- c("a", "mu", "sigma", "-2l(theta)", "2p")
result.c <- result.c %>% mutate(AIC=result.c[,4]+result.c[,5])
rownames(result.c) <- c("Weibull.2p", "Weibull.3p", "log.normal", "Gompertz", "Gumbel", "Normal")
kable(result.c) %>% kable_styling(full_width = F, position = "left")
a | mu | sigma | -2l(theta) | 2p | AIC | |
---|---|---|---|---|---|---|
Weibull.2p | 0.00 | 12.11942 | 0.4289763 | 985.9389 | 4 | 989.9389 |
Weibull.3p | -33499.97 | 12.25359 | 0.3091112 | 987.8194 | 6 | 993.8194 |
log.normal | 0.00 | 12.02468 | 0.7055005 | 985.3610 | 4 | 989.3610 |
Gompertz | 0.00 | 122327.18661 | 48810.3093730 | 1046.5939 | 4 | 1050.5939 |
Gumbel | 0.00 | 87124.05546 | 59738.9134097 | 1017.7191 | 4 | 1021.7191 |
Normal | 0.00 | 114060.32832 | 66068.2076455 | 1017.5809 | 4 | 1021.5809 |
AIC가 작은 Weibull 2parameter 모형과 log normal 모형 선택
d) 위에서 선택된 가장 적절한 수명분포 2개를 이용하여 기관차 부품의 수명이 15만, 20만, 30만 mile 이상일 확률을 구하시오.
t <- c(150000, 200000, 300000)
model1 <- c(mu = weibull.2p[2], sigma = weibull.2p[3])
model2 <- c(mu = log.normal[2], sigma = log.normal[3])
result.d <- data.frame(t=t, weibull =exp(-exp((log(t)-model1[1])/model1[2])),
log.normal = 1-pnorm((log(t)-model2[1] )/model2[2]))
kable(result.d) %>% kable_styling(full_width = F, position = "left")
t | weibull | log.normal |
---|---|---|
150000 | 0.5348041 | 0.5598795 |
200000 | 0.2941008 | 0.3985483 |
300000 | 0.0428822 | 0.2027535 |