test
Stochastic mortality model selection
Packages
library(StMoMo)
library(demography)
library(MLmetrics)
library(grpreg)
library(gtsummary)
library(tidyverse)Rewrite Hmd.mx and download data
TWNdata = hmd.mx(
country = "TWN",
username = "f110130106@nkust.edu.tw",
password = "zxc1000021",
label = "TAIWAN"
)畫出歷年死亡率在每個歲數之下的變化
par(mai = c(1,1,1,1))
plot(TWNdata, series = "total",ages = 0:89)
legend(95,-1,xpd = TRUE,
box.lwd = 1,
legend = unique(TWNdata$year),
col = rainbow(length(TWNdata$year)*1.25), ncol = 2, pch = 15,
title ="Year", cex=0.4,)par(mai = c(1,1,1,1))
plot(TWNdata, series = "total", plot.type = "time", ages = 0:89)
legend(2025,0,xpd = TRUE,
box.lwd = 1,
legend = unique(TWNdata$year),
col = rainbow(length(TWNdata$year)*1.25), ncol = 2, pch = 19,
title = "Year", cex = 0.4,)Define model
## Define Lee-Carter model using predefined function
LC <- lc(link = "log-Gaussian")
## Define CBD model using predefined function
CBD <- cbd(link = "log-Gaussian")
## Define RH model using predefined functions
RH <- rh(link = "log-Gaussian", cohortAgeFun = "1")
##Define APC model
APC <- apc(link = "log-Gaussian")
##Define M6 model
M6 <- m6(link = "log-Gaussian")
##Define M7 model
M7 <- m7(link = "log-Gaussian")
##Define M8 model
M8 <- m8(link = "log-Gaussian", xc = 100)
##define PLAT model
PLAT <- plat(link = "log-Gaussian")
sPLAT <- plat(link = "log-Gaussian", simplified = TRUE)Make the data to StMoMo class
TWNStMoMo <- StMoMoData(TWNdata, series = "total")
TWNStMoMo1 <- TWNStMoMoModel fitting
ages.fit <- 0:100 ##設定想要建模的年齡範圍
years.fit <- 1970:2009
#LC
LCfit <- fit(LC,
data = TWNStMoMo1,
ages.fit = ages.fit,
years.fit = years.fit
)
par(mar = c(2,2,2,2))
LCfit$model$textFormula
#APC
APCfit <-
fit(APC,
data = TWNStMoMo1,
ages.fit = ages.fit,
years.fit = years.fit
)
APCfit$model$textFormula
#CBD
CBDfit <-
fit(CBD,
data = TWNStMoMo1,
ages.fit = ages.fit,
years.fit = years.fit
)
CBDfit$model$textFormula
#M6
M6fit <-
fit(M6,
data = TWNStMoMo1,
ages.fit = ages.fit,
years.fit = years.fit
)
M6fit$model$textFormula
#M7
M7fit <- fit(M7,
data = TWNStMoMo1,
ages.fit = ages.fit,
years.fit = years.fit
)
M7fit$model$textFormula
#M8
M8fit <- fit(M8,
data = TWNStMoMo1,
ages.fit = ages.fit,
years.fit = years.fit
)
M8fit$model$textFormula
#PLAT
PLATfit <-
fit(PLAT,
data = TWNStMoMo1,
ages.fit = ages.fit,
years.fit = years.fit
)
PLATfit$model$textFormula
#sPLAT
sPLATfit <- fit(sPLAT,
data = TWNStMoMo1,
ages.fit = ages.fit,
years.fit = years.fit
)
sPLATfit$model$textFormula
## Fit RH model
RHfit <-
fit(
RH,
data = TWNStMoMo1,
ages.fit = ages.fit,
years.fit = years.fit,
start.ax = LCfit$ax,
start.bx = LCfit$bx,
start.kt = LCfit$kt
)
RHfit$model$textFormulaPlot parameters
LCfit$model$textFormula[1] "log m[x,t] = a[x] + b1[x] k1[t]"
par(mar = c(2, 2, 2, 2))
plot(LCfit)RHfit$model$textFormula[1] "log m[x,t] = a[x] + b1[x] k1[t] + g[t-x]"
par(mar = c(2, 2, 2, 2))
plot(RHfit)APCfit$model$textFormula[1] "log m[x,t] = a[x] + k1[t] + g[t-x]"
par(mar = c(2, 2, 2, 2))
plot(APCfit)CBDfit$model$textFormula[1] "log m[x,t] = k1[t] + f2[x] k2[t]"
par(mar = c(2, 2, 2, 2))
plot(CBDfit)M6fit$model$textFormula[1] "log m[x,t] = k1[t] + f2[x] k2[t] + g[t-x]"
par(mar = c(2, 2, 2, 2))
plot(M6fit)M7fit$model$textFormula[1] "log m[x,t] = k1[t] + f2[x] k2[t] + f3[x] k3[t] + g[t-x]"
par(mar = c(2, 2, 2, 2))
plot(M7fit)M8fit$model$textFormula[1] "log m[x,t] = k1[t] + f2[x] k2[t] + f0[x] g[t-x]"
par(mar = c(2, 2, 2, 2))
plot(M8fit)PLATfit$model$textFormula[1] "log m[x,t] = a[x] + k1[t] + f2[x] k2[t] + f3[x] k3[t] + g[t-x]"
par(mar = c(2, 2, 2, 2))
plot(PLATfit)sPLATfit$model$textFormula[1] "log m[x,t] = a[x] + k1[t] + f2[x] k2[t] + g[t-x]"
par(mar = c(2, 2, 2, 2))
plot(sPLATfit)In sample evaluate
Compute deviance residuals
LCres <- residuals(LCfit)
CBDres <- residuals(CBDfit)
RHres <- residuals(RHfit)
APCres <- residuals(APCfit)
M6res <- residuals(M6fit)
M7res <- residuals(M7fit)
M8res <- residuals(M8fit)
PLATres <- residuals(PLATfit)
sPLATres <- residuals(sPLATfit)Plot residuals
## Colour map of residuals for LC
plot(LCres, type = "colourmap", reslim = c(-3.5, 3.5))## Colour map of residuals for CBD
plot(CBDres, type = "colourmap", reslim = c(-3.5, 3.5))## Colour map of residuals for APC
plot(APCres, type = "colourmap", reslim = c(-3.5, 3.5))## Colour map of residuals for RH
plot(RHres, type = "colourmap", reslim = c(-3.5, 3.5))## Colour map of residuals for M6
plot(M6res, type = "colourmap", reslim = c(-3.5, 3.5))## Colour map of residuals for M7
plot(M7res, type = "colourmap", reslim = c(-3.5, 3.5))## Colour map of residuals for M8
plot(M8res, type = "colourmap", reslim = c(-3.5, 3.5))## Colour map of residuals for PLAT
plot(PLATres, type = "colourmap", reslim = c(-3.5, 3.5))## Colour map of residuals for PLAT
plot(sPLATres, type = "colourmap", reslim = c(-3.5, 3.5))Plot scatter plot of residuals
## Scatter plot of residuals for LC
plot(LCres, type = "scatter", reslim = c(-3.5, 3.5))## Scatter plot of residuals for CBD
plot(CBDres, type = "scatter", reslim = c(-3.5, 3.5))## Scatter plot of residuals for APC
plot(APCres, type = "scatter", reslim = c(-3.5, 3.5))## Scatter plot of residuals for RH
plot(RHres, type = "scatter", reslim = c(-3.5, 3.5))## Scatter plot of residuals for M6
plot(M6res, type = "scatter", reslim = c(-3.5, 3.5))## Scatter plot of residuals for M7
plot(M7res, type = "scatter", reslim = c(-3.5, 3.5))## Scatter plot of residuals for M8
plot(M8res, type = "scatter", reslim = c(-3.5, 3.5))## Scatter plot of residuals for PLAT
plot(PLATres, type = "scatter", reslim = c(-3.5, 3.5))## Scatter plot of residuals for sPLAT
plot(sPLATres, type = "scatter", reslim = c(-3.5, 3.5))Compare the AIC and BIC
AIC BIC logLik
LC -7806.343 -6293.383 4143.171
RH -9173.750 -6784.534 4965.875
APC -8212.808 -6460.296 4384.404
CBD 6592.309 7096.629 -3216.155
M6 4165.700 5539.972 -1864.850
M7 3139.535 4759.663 -1312.768
M8 3815.720 5189.992 -1689.860
PLAT -8754.379 -6516.459 4732.189
sPLAT -8428.907 -6436.843 4530.453
fitted值
LCfitted <- fitted(LCfit, type = "rates")
RHfitted <- fitted(RHfit, type = "rates")
APCfitted <- fitted(APCfit, type = "rates")
CBDfitted <- fitted(CBDfit, type = "rates")
M6fitted <- fitted(M6fit, type = "rates")
M7fitted <- fitted(M7fit, type = "rates")
M8fitted <- fitted(M8fit, type = "rates")
PLATfitted <- fitted(PLATfit, type = "rates")
sPLATfitted <- fitted(sPLATfit, type = "rates")Table of information criteria
deathactualrate <- LCfit$Dxt/LCfit$Ext
model_mape <- matrix(c(MAPE(LCfitted,deathactualrate),
MAPE(RHfitted,deathactualrate),
MAPE(APCfitted,deathactualrate),
MAPE(CBDfitted,deathactualrate),
MAPE(M6fitted,deathactualrate),
MAPE(M7fitted,deathactualrate),
MAPE(M8fitted,deathactualrate),
MAPE(PLATfitted,deathactualrate),
MAPE(sPLATfitted,deathactualrate),
MSE(LCfitted,deathactualrate),
MSE(RHfitted,deathactualrate),
MSE(APCfitted,deathactualrate),
MSE(CBDfitted,deathactualrate),
MSE(M6fitted,deathactualrate),
MSE(M7fitted,deathactualrate),
MSE(M8fitted,deathactualrate),
MSE(PLATfitted,deathactualrate),
MSE(sPLATfitted,deathactualrate) ),
ncol = 2,
nrow = 9,
byrow = FALSE)
colnames(model_mape) <- c("MAPE","MSE")
rownames(model_mape) <- c("LC","RH","APC","CBD","M6","M7","M8","PLAT","sPLAT")
model_mape.table <- as.table(model_mape)
model_mape.table MAPE MSE
LC 5.972308e-02 2.152122e-04
RH 4.864498e-02 1.732037e-04
APC 5.538630e-02 1.507062e-04
CBD 3.129871e-01 1.136675e-03
M6 2.013970e-01 8.629253e-04
M7 1.751829e-01 1.164931e-04
M8 1.759729e-01 5.714967e-04
PLAT 5.058188e-02 8.443537e-05
sPLAT 5.379771e-02 1.123537e-04
Out of sample evaluate
Information criteria of cross validation
ages.train <- 0:100
years.train <- 1970:2009
LCcv10 <-
cv.StMoMo(
LC,
h = 10,
data = TWNStMoMo1,
type = "logrates",
years.train = years.fit,
ages.train = ages.fit,
all.horizon = TRUE
)
RHcv10 <-
cv.StMoMo(
RH,
h = 10,
data = TWNStMoMo1,
type = "logrates",
years.train = years.fit,
ages.train = ages.fit,
all.horizon = TRUE
)
APCcv10 <-
cv.StMoMo(
APC,
h = 10,
data = TWNStMoMo1,
type = "logrates",
years.train = years.fit,
ages.train = ages.fit,
all.horizon = TRUE
)
CBDcv10 <-
cv.StMoMo(
CBD,
h = 10,
data = TWNStMoMo1,
type = "logrates",
years.train = years.fit,
ages.train = ages.fit,
all.horizon = TRUE
)
M6cv10 <-
cv.StMoMo(
M6,
h = 10,
data = TWNStMoMo1,
type = "logrates",
years.train = years.fit,
ages.train = ages.fit,
all.horizon = TRUE
)
M7cv10 <-
cv.StMoMo(
M7,
h = 10,
data = TWNStMoMo1,
type = "logrates",
years.train = years.fit,
ages.train = ages.fit,
all.horizon = TRUE
)
M8cv10 <-
cv.StMoMo(
M8,
h = 10,
data = TWNStMoMo1,
type = "logrates",
years.train = years.fit,
ages.train = ages.fit,
all.horizon = TRUE
)
PLATcv10 <-
cv.StMoMo(
PLAT,
h = 10,
data = TWNStMoMo1,
type = "logrates",
years.train = years.fit,
ages.train = ages.fit,
all.horizon = TRUE
)
sPLATcv10 <-
cv.StMoMo(
sPLAT,
h = 10,
data = TWNStMoMo1,
type = "logrates",
years.train = years.fit,
ages.train = ages.fit,
all.horizon = TRUE
)Information criteria of cross validation table
cvmse <- matrix(c(LCcv10$cv.mse.h,
RHcv10$cv.mse.h,
APCcv10$cv.mse.h,
CBDcv10$cv.mse.h,
M6cv10$cv.mse.h,
M7cv10$cv.mse.h,
M8cv10$cv.mse.h,
PLATcv10$cv.mse.h,
sPLATcv10$cv.mse.h),
ncol = 10,
nrow = 9,
byrow = TRUE)
colnames(cvmse) <- c(1:10)
rownames(cvmse) <- c("LC","RH","APC","CBD","M6","M7","M8","PLAT","sPLAT")
cvmse.table <- as.table(cvmse)
cvmse.table 1 2 3 4 5
LC 0.011123624 0.011851159 0.012992406 0.013255625 0.013605959
RH 0.128714112 0.493680803 1.101004216 1.980134524 3.127757249
APC 0.009508617 0.010497173 0.011395932 0.011374838 0.011483288
CBD 0.296380211 0.293006984 0.290968865 0.287434990 0.284024346
M6 0.197950546 0.219787135 0.238224263 0.249315953 0.257285232
M7 0.153575272 0.172718141 0.189941042 0.202324456 0.213788587
M8 0.190602043 0.217060921 0.237481099 0.248807606 0.256184116
PLAT 0.009132082 0.010520860 0.011855074 0.012255496 0.012846513
sPLAT 0.009398635 0.010563610 0.011684493 0.011847064 0.012097647
6 7 8 9 10
LC 0.013170581 0.013526251 0.014297712 0.014551341 0.014966483
RH 4.553629097 6.284071303 8.326311881 10.649279422 13.311662233
APC 0.011239670 0.011022747 0.011281695 0.010992396 0.010867354
CBD 0.280949572 0.277784697 0.276070951 0.274555883 0.272083222
M6 0.259874182 0.258306935 0.253481132 0.243679425 0.227840647
M7 0.221253646 0.225227369 0.228064441 0.226999125 0.220516768
M8 0.259665567 0.258790222 0.254078497 0.244649451 0.228435074
PLAT 0.012785140 0.012837726 0.013320293 0.013033817 0.013166320
sPLAT 0.011936181 0.011848655 0.012190469 0.012012388 0.012007902
forecasting for next ten years
## Forecast all models
LCfor <- forecast(LCfit,
h = 10,
kt.method = "mrwd")
par(mar = c(2,2,2,2))
plot(LCfor, only.kt = TRUE)CBDfor <- forecast(CBDfit,
h = 10,
kt.method = "mrwd")
par(mar = c(2,2,2,2))
plot(CBDfor)auto.arima(APCfit$gc,stepwise = FALSE,approximation = FALSE)Series: APCfit$gc
ARIMA(0,2,5)
Coefficients:
ma1 ma2 ma3 ma4 ma5
-1.2611 0.6354 -0.2184 -0.6357 0.6323
s.e. 0.0879 0.1351 0.1264 0.1343 0.1048
sigma^2 = 0.001334: log likelihood = 260.45
AIC=-508.89 AICc=-508.25 BIC=-491.33
APCfor <- forecast(APCfit,
h = 10,
kt.method = "mrwd",
gc.order = c(0,2,5)
)
par(mar = c(2,2,2,2))
plot(APCfor)auto.arima(RHfit$gc,stepwise = FALSE,approximation = FALSE)Series: RHfit$gc
ARIMA(1,2,1)
Coefficients:
ar1 ma1
0.2351 -0.8964
s.e. 0.1017 0.0463
sigma^2 = 0.001309: log likelihood = 262.67
AIC=-519.33 AICc=-519.15 BIC=-510.55
RHfor <- forecast(RHfit,
h = 10,
kt.method = "mrwd",
gc.order = c(2,1,1) #gc.order是由上面iarima取得最小AIC的參數
)
par(mar = c(2,2,2,2))
plot(RHfor)auto.arima(M6fit$gc,stepwise = FALSE,approximation = FALSE)Series: M6fit$gc
ARIMA(1,2,2)
Coefficients:
ar1 ma1 ma2
0.8882 -1.1798 0.7774
s.e. 0.1397 0.0819 0.1170
sigma^2 = 0.004513: log likelihood = 176.93
AIC=-345.85 AICc=-345.55 BIC=-334.14
M6for <- forecast(M6fit,
h = 10,
kt.method = "mrwd",
gc.order = c(1,2,2))
par(mar = c(2,2,2,2))
plot(M6for)auto.arima(M7fit$gc,stepwise = FALSE,approximation = FALSE)Series: M7fit$gc
ARIMA(0,0,4) with zero mean
Coefficients:
ma1 ma2 ma3 ma4
2.5522 3.3691 2.4645 0.9345
s.e. 0.0652 0.1500 0.1625 0.0834
sigma^2 = 0.02199: log likelihood = 64.02
AIC=-118.05 AICc=-117.6 BIC=-103.34
M7for <- forecast(M7fit,
h = 10,
kt.method = "mrwd",
gc.order = c(0, 0, 4)
)
par(mar = c(2,2,2,2))
plot(M7for)auto.arima(M8fit$gc,stepwise = FALSE,approximation = FALSE)Series: M8fit$gc
ARIMA(2,1,3)
Coefficients:
ar1 ar2 ma1 ma2 ma3
0.0283 0.9434 -1.8457 1.7241 -0.5896
s.e. 0.0629 0.0627 0.0799 0.1309 0.1008
sigma^2 = 0.00391: log likelihood = 184.06
AIC=-356.11 AICc=-355.47 BIC=-338.5
M8for <- forecast(M8fit,
h = 10,
kt.method = "iarima",
gc.order = c(2, 1, 3))
par(mar = c(2,2,2,2))
plot(M8for)auto.arima(PLATfit$gc,stepwise = FALSE,approximation = FALSE)Series: PLATfit$gc
ARIMA(3,0,2) with non-zero mean
Coefficients:
ar1 ar2 ar3 ma1 ma2 mean
0.9596 -0.5963 0.5752 -0.1849 0.9455 -0.0179
s.e. 0.1207 0.1429 0.1182 0.0767 0.0402 0.0674
sigma^2 = 0.00108: log likelihood = 280.03
AIC=-546.07 AICc=-545.22 BIC=-525.47
PLATfor <- forecast(PLATfit,
h = 10,
gc.order = c(3, 0, 2))
plot(PLATfor)auto.arima(sPLATfit$gc,stepwise = FALSE,approximation = FALSE)Series: sPLATfit$gc
ARIMA(1,0,4) with zero mean
Coefficients:
ar1 ma1 ma2 ma3 ma4
0.9373 -0.1245 0.3112 0.1378 -0.2370
s.e. 0.0349 0.1038 0.1218 0.1031 0.1341
sigma^2 = 0.001099: log likelihood = 279.33
AIC=-546.67 AICc=-546.04 BIC=-529.02
sPLATfor <- forecast(PLATfit,
h = 10,
gc.order = c(1, 0, 4))
par(mar = c(2,2,2,2))
plot(sPLATfor)