test

Author

leo

Stochastic mortality model selection

Packages

library(StMoMo)
library(demography)
library(MLmetrics)
library(grpreg)
library(gtsummary)
library(tidyverse)

Rewrite Hmd.mx and download data

TWNdata = hmd.mx(
  country = "TWN",
  username = "f110130106@nkust.edu.tw",
  password = "zxc1000021",
  label = "TAIWAN"
)

畫出歷年死亡率在每個歲數之下的變化

par(mai = c(1,1,1,1))
plot(TWNdata, series = "total",ages = 0:89)
legend(95,-1,xpd = TRUE,
       box.lwd = 1,
       legend = unique(TWNdata$year),
       col = rainbow(length(TWNdata$year)*1.25), ncol = 2, pch = 15, 
       title ="Year", cex=0.4,)

par(mai = c(1,1,1,1))
plot(TWNdata, series = "total", plot.type = "time", ages = 0:89)
legend(2025,0,xpd = TRUE,
       box.lwd = 1,
       legend = unique(TWNdata$year),
       col = rainbow(length(TWNdata$year)*1.25), ncol = 2, pch = 19, 
       title = "Year", cex = 0.4,)

Define model

## Define Lee-Carter model using predefined function
LC <- lc(link = "log-Gaussian")



## Define CBD model using predefined function
CBD <- cbd(link = "log-Gaussian")


## Define RH model using predefined functions
RH <- rh(link = "log-Gaussian",  cohortAgeFun = "1")




##Define APC model
APC <- apc(link = "log-Gaussian")


##Define M6 model
M6 <- m6(link = "log-Gaussian")


##Define M7 model
M7 <- m7(link = "log-Gaussian")

##Define M8 model
M8 <- m8(link = "log-Gaussian", xc = 100)


##define PLAT model

PLAT <- plat(link = "log-Gaussian")

sPLAT <- plat(link = "log-Gaussian", simplified = TRUE)

Make the data to StMoMo class

TWNStMoMo <- StMoMoData(TWNdata, series = "total")
TWNStMoMo1 <- TWNStMoMo

Model fitting

ages.fit <- 0:100 ##設定想要建模的年齡範圍
years.fit <- 1970:2009

#LC
LCfit <- fit(LC,
             data = TWNStMoMo1,
             ages.fit = ages.fit,
             years.fit = years.fit
             )

par(mar = c(2,2,2,2))

LCfit$model$textFormula





#APC
APCfit <-
  fit(APC,
      data = TWNStMoMo1,
      ages.fit = ages.fit,
      years.fit = years.fit
      )

APCfit$model$textFormula




#CBD
CBDfit <-
  fit(CBD,
      data = TWNStMoMo1,
      ages.fit = ages.fit,
      years.fit = years.fit
      )
CBDfit$model$textFormula


#M6
M6fit <-
  fit(M6,
      data = TWNStMoMo1,
      ages.fit = ages.fit,
      years.fit = years.fit
      )
M6fit$model$textFormula



#M7
M7fit <- fit(M7,
             data = TWNStMoMo1,
             ages.fit = ages.fit,
             years.fit = years.fit
             )
M7fit$model$textFormula






#M8
M8fit <- fit(M8,
             data = TWNStMoMo1,
             ages.fit = ages.fit,
             years.fit = years.fit
             )
M8fit$model$textFormula




#PLAT
PLATfit <-
  fit(PLAT,
      data = TWNStMoMo1,
      ages.fit = ages.fit,
      years.fit = years.fit
      )
PLATfit$model$textFormula


#sPLAT
sPLATfit <- fit(sPLAT, 
                data = TWNStMoMo1,
                ages.fit = ages.fit,
                years.fit = years.fit
                )
sPLATfit$model$textFormula





## Fit RH model 
RHfit <-
  fit(
    RH,
    data = TWNStMoMo1,
    ages.fit = ages.fit,
    years.fit = years.fit,
    start.ax = LCfit$ax,
    start.bx = LCfit$bx,
    start.kt = LCfit$kt
    )
RHfit$model$textFormula

Plot parameters

LCfit$model$textFormula

[1] "log m[x,t] = a[x] + b1[x] k1[t]"

par(mar = c(2, 2, 2, 2))
plot(LCfit)

RHfit$model$textFormula

[1] "log m[x,t] = a[x] + b1[x] k1[t] + g[t-x]"

par(mar = c(2, 2, 2, 2))
plot(RHfit)

APCfit$model$textFormula

[1] "log m[x,t] = a[x] + k1[t] + g[t-x]"

par(mar = c(2, 2, 2, 2))
plot(APCfit)

CBDfit$model$textFormula

[1] "log m[x,t] = k1[t] + f2[x] k2[t]"

par(mar = c(2, 2, 2, 2))
plot(CBDfit)

M6fit$model$textFormula

[1] "log m[x,t] = k1[t] + f2[x] k2[t] + g[t-x]"

par(mar = c(2, 2, 2, 2))
plot(M6fit)

M7fit$model$textFormula

[1] "log m[x,t] = k1[t] + f2[x] k2[t] + f3[x] k3[t] + g[t-x]"

par(mar = c(2, 2, 2, 2))
plot(M7fit)

M8fit$model$textFormula

[1] "log m[x,t] = k1[t] + f2[x] k2[t] + f0[x] g[t-x]"

par(mar = c(2, 2, 2, 2))
plot(M8fit)

PLATfit$model$textFormula

[1] "log m[x,t] = a[x] + k1[t] + f2[x] k2[t] + f3[x] k3[t] + g[t-x]"

par(mar = c(2, 2, 2, 2))
plot(PLATfit)

sPLATfit$model$textFormula

[1] "log m[x,t] = a[x] + k1[t] + f2[x] k2[t] + g[t-x]"

par(mar = c(2, 2, 2, 2))
plot(sPLATfit)

In sample evaluate

Compute deviance residuals

LCres <- residuals(LCfit)
CBDres <- residuals(CBDfit)
RHres <- residuals(RHfit)
APCres <- residuals(APCfit)
M6res <- residuals(M6fit)
M7res <- residuals(M7fit)
M8res <- residuals(M8fit)
PLATres <- residuals(PLATfit)
sPLATres <- residuals(sPLATfit)

Plot residuals

## Colour map of residuals for LC 
plot(LCres, type = "colourmap", reslim = c(-3.5, 3.5))

## Colour map of residuals for CBD 
plot(CBDres, type = "colourmap", reslim = c(-3.5, 3.5))

## Colour map of residuals for APC 
plot(APCres, type = "colourmap", reslim = c(-3.5, 3.5))

## Colour map of residuals for RH 
plot(RHres, type = "colourmap", reslim = c(-3.5, 3.5))

## Colour map of residuals for M6
plot(M6res, type = "colourmap", reslim = c(-3.5, 3.5))

## Colour map of residuals for M7 
plot(M7res, type = "colourmap", reslim = c(-3.5, 3.5))

## Colour map of residuals for M8 
plot(M8res, type = "colourmap", reslim = c(-3.5, 3.5))

## Colour map of residuals for PLAT 
plot(PLATres, type = "colourmap", reslim = c(-3.5, 3.5))

## Colour map of residuals for PLAT 
plot(sPLATres, type = "colourmap", reslim = c(-3.5, 3.5))

Plot scatter plot of residuals

## Scatter plot of residuals for LC 
plot(LCres, type = "scatter", reslim = c(-3.5, 3.5))

## Scatter plot of residuals for CBD 
plot(CBDres, type = "scatter", reslim = c(-3.5, 3.5))

## Scatter plot of residuals for APC
plot(APCres, type = "scatter", reslim = c(-3.5, 3.5))

## Scatter plot of residuals for RH
plot(RHres, type = "scatter", reslim = c(-3.5, 3.5))

## Scatter plot of residuals for M6
plot(M6res, type = "scatter", reslim = c(-3.5, 3.5))

## Scatter plot of residuals for M7
plot(M7res, type = "scatter", reslim = c(-3.5, 3.5))

## Scatter plot of residuals for M8
plot(M8res, type = "scatter", reslim = c(-3.5, 3.5))

## Scatter plot of residuals for PLAT
plot(PLATres, type = "scatter", reslim = c(-3.5, 3.5))

## Scatter plot of residuals for sPLAT
plot(sPLATres, type = "scatter", reslim = c(-3.5, 3.5))

Compare the AIC and BIC

            AIC       BIC    logLik
LC    -7806.343 -6293.383  4143.171
RH    -9173.750 -6784.534  4965.875
APC   -8212.808 -6460.296  4384.404
CBD    6592.309  7096.629 -3216.155
M6     4165.700  5539.972 -1864.850
M7     3139.535  4759.663 -1312.768
M8     3815.720  5189.992 -1689.860
PLAT  -8754.379 -6516.459  4732.189
sPLAT -8428.907 -6436.843  4530.453

fitted值

LCfitted <- fitted(LCfit, type = "rates")
RHfitted <- fitted(RHfit, type = "rates")
APCfitted <- fitted(APCfit, type = "rates")
CBDfitted <- fitted(CBDfit, type = "rates")
M6fitted <- fitted(M6fit, type = "rates")
M7fitted <- fitted(M7fit, type = "rates")
M8fitted <- fitted(M8fit, type = "rates")
PLATfitted <- fitted(PLATfit, type = "rates")
sPLATfitted <- fitted(sPLATfit, type = "rates")

Table of information criteria

deathactualrate <- LCfit$Dxt/LCfit$Ext
model_mape <- matrix(c(MAPE(LCfitted,deathactualrate),
                     MAPE(RHfitted,deathactualrate),
                     MAPE(APCfitted,deathactualrate),
                     MAPE(CBDfitted,deathactualrate),
                     MAPE(M6fitted,deathactualrate),
                     MAPE(M7fitted,deathactualrate),
                     MAPE(M8fitted,deathactualrate),
                     MAPE(PLATfitted,deathactualrate),
                     MAPE(sPLATfitted,deathactualrate),
                     MSE(LCfitted,deathactualrate),
                     MSE(RHfitted,deathactualrate),
                     MSE(APCfitted,deathactualrate),
                     MSE(CBDfitted,deathactualrate),
                     MSE(M6fitted,deathactualrate),
                     MSE(M7fitted,deathactualrate),
                     MSE(M8fitted,deathactualrate),
                     MSE(PLATfitted,deathactualrate),
                     MSE(sPLATfitted,deathactualrate)                     ),
                   ncol = 2,
                   nrow = 9,
                   byrow = FALSE)


colnames(model_mape) <- c("MAPE","MSE")
rownames(model_mape) <- c("LC","RH","APC","CBD","M6","M7","M8","PLAT","sPLAT")
model_mape.table <- as.table(model_mape)
model_mape.table

              MAPE          MSE
LC    5.972308e-02 2.152122e-04
RH    4.864498e-02 1.732037e-04
APC   5.538630e-02 1.507062e-04
CBD   3.129871e-01 1.136675e-03
M6    2.013970e-01 8.629253e-04
M7    1.751829e-01 1.164931e-04
M8    1.759729e-01 5.714967e-04
PLAT  5.058188e-02 8.443537e-05
sPLAT 5.379771e-02 1.123537e-04

Out of sample evaluate

Information criteria of cross validation

ages.train <- 0:100
years.train <- 1970:2009
LCcv10 <-
  cv.StMoMo(
    LC,
    h = 10,
    data = TWNStMoMo1,
    type = "logrates",
    years.train = years.fit,
    ages.train = ages.fit,
    all.horizon = TRUE
    
  ) 


RHcv10 <-
  cv.StMoMo(
    RH,
    h = 10,
    data = TWNStMoMo1,
    type = "logrates",
    years.train = years.fit,
    ages.train = ages.fit,
    all.horizon = TRUE
  )



APCcv10 <-
  cv.StMoMo(
    APC,
    h = 10,
    data = TWNStMoMo1,
    type = "logrates",
    years.train = years.fit,
    ages.train = ages.fit,
    all.horizon = TRUE
  )



CBDcv10 <-
  cv.StMoMo(
    CBD,
    h = 10,
    data = TWNStMoMo1,
    type = "logrates",
    years.train = years.fit,
    ages.train = ages.fit,
    all.horizon = TRUE
  )



M6cv10 <-
  cv.StMoMo(
    M6,
    h = 10,
    data = TWNStMoMo1,
    type = "logrates",
    years.train = years.fit,
    ages.train = ages.fit,
    all.horizon = TRUE
  )



M7cv10 <-
  cv.StMoMo(
    M7,
    h = 10,
    data = TWNStMoMo1,
    type = "logrates",
    years.train = years.fit,
    ages.train = ages.fit,
    all.horizon = TRUE
  )



M8cv10 <-
  cv.StMoMo(
    M8,
    h = 10,
    data = TWNStMoMo1,
    type = "logrates",
    years.train = years.fit,
    ages.train = ages.fit,
    all.horizon = TRUE
  )



PLATcv10 <-
  cv.StMoMo(
    PLAT,
    h = 10,
    data = TWNStMoMo1,
    type = "logrates",
    years.train = years.fit,
    ages.train = ages.fit,
    all.horizon = TRUE
  )

sPLATcv10 <-
  cv.StMoMo(
    sPLAT,
    h = 10,
    data = TWNStMoMo1,
    type = "logrates",
    years.train = years.fit,
    ages.train = ages.fit,
    all.horizon = TRUE
  )

Information criteria of cross validation table

cvmse <- matrix(c(LCcv10$cv.mse.h, 
                 RHcv10$cv.mse.h,
                 APCcv10$cv.mse.h,
                 CBDcv10$cv.mse.h,
                 M6cv10$cv.mse.h,
                 M7cv10$cv.mse.h,
                 M8cv10$cv.mse.h,
                 PLATcv10$cv.mse.h,
                 sPLATcv10$cv.mse.h),
               ncol = 10,
               nrow = 9,
               byrow = TRUE)
colnames(cvmse) <- c(1:10)
rownames(cvmse) <- c("LC","RH","APC","CBD","M6","M7","M8","PLAT","sPLAT")
cvmse.table <- as.table(cvmse)
cvmse.table

                 1            2            3            4            5
LC     0.011123624  0.011851159  0.012992406  0.013255625  0.013605959
RH     0.128714112  0.493680803  1.101004216  1.980134524  3.127757249
APC    0.009508617  0.010497173  0.011395932  0.011374838  0.011483288
CBD    0.296380211  0.293006984  0.290968865  0.287434990  0.284024346
M6     0.197950546  0.219787135  0.238224263  0.249315953  0.257285232
M7     0.153575272  0.172718141  0.189941042  0.202324456  0.213788587
M8     0.190602043  0.217060921  0.237481099  0.248807606  0.256184116
PLAT   0.009132082  0.010520860  0.011855074  0.012255496  0.012846513
sPLAT  0.009398635  0.010563610  0.011684493  0.011847064  0.012097647
                 6            7            8            9           10
LC     0.013170581  0.013526251  0.014297712  0.014551341  0.014966483
RH     4.553629097  6.284071303  8.326311881 10.649279422 13.311662233
APC    0.011239670  0.011022747  0.011281695  0.010992396  0.010867354
CBD    0.280949572  0.277784697  0.276070951  0.274555883  0.272083222
M6     0.259874182  0.258306935  0.253481132  0.243679425  0.227840647
M7     0.221253646  0.225227369  0.228064441  0.226999125  0.220516768
M8     0.259665567  0.258790222  0.254078497  0.244649451  0.228435074
PLAT   0.012785140  0.012837726  0.013320293  0.013033817  0.013166320
sPLAT  0.011936181  0.011848655  0.012190469  0.012012388  0.012007902

forecasting for next ten years

## Forecast all models

LCfor <- forecast(LCfit,
                  h = 10, 
                  kt.method = "mrwd")
par(mar = c(2,2,2,2))
plot(LCfor, only.kt = TRUE)

CBDfor <- forecast(CBDfit, 
                   h = 10,
                   kt.method = "mrwd")
par(mar = c(2,2,2,2))
plot(CBDfor)

auto.arima(APCfit$gc,stepwise = FALSE,approximation = FALSE)

Series: APCfit$gc 
ARIMA(0,2,5) 

Coefficients:
          ma1     ma2      ma3      ma4     ma5
      -1.2611  0.6354  -0.2184  -0.6357  0.6323
s.e.   0.0879  0.1351   0.1264   0.1343  0.1048

sigma^2 = 0.001334:  log likelihood = 260.45
AIC=-508.89   AICc=-508.25   BIC=-491.33

APCfor <- forecast(APCfit,
                   h = 10,
                   kt.method = "mrwd",
                   gc.order = c(0,2,5) 
)
par(mar = c(2,2,2,2))
plot(APCfor)

auto.arima(RHfit$gc,stepwise = FALSE,approximation = FALSE)

Series: RHfit$gc 
ARIMA(1,2,1) 

Coefficients:
         ar1      ma1
      0.2351  -0.8964
s.e.  0.1017   0.0463

sigma^2 = 0.001309:  log likelihood = 262.67
AIC=-519.33   AICc=-519.15   BIC=-510.55

RHfor <- forecast(RHfit,
                  h = 10,
                  kt.method = "mrwd",
                  gc.order = c(2,1,1) #gc.order是由上面iarima取得最小AIC的參數
)
par(mar = c(2,2,2,2))
plot(RHfor)

auto.arima(M6fit$gc,stepwise = FALSE,approximation = FALSE)

Series: M6fit$gc 
ARIMA(1,2,2) 

Coefficients:
         ar1      ma1     ma2
      0.8882  -1.1798  0.7774
s.e.  0.1397   0.0819  0.1170

sigma^2 = 0.004513:  log likelihood = 176.93
AIC=-345.85   AICc=-345.55   BIC=-334.14

M6for <- forecast(M6fit,
                  h = 10,
                  kt.method = "mrwd",
                  gc.order = c(1,2,2))
par(mar = c(2,2,2,2))
plot(M6for)

auto.arima(M7fit$gc,stepwise = FALSE,approximation = FALSE)

Series: M7fit$gc 
ARIMA(0,0,4) with zero mean 

Coefficients:
         ma1     ma2     ma3     ma4
      2.5522  3.3691  2.4645  0.9345
s.e.  0.0652  0.1500  0.1625  0.0834

sigma^2 = 0.02199:  log likelihood = 64.02
AIC=-118.05   AICc=-117.6   BIC=-103.34

M7for <- forecast(M7fit,
                  h = 10,
                  kt.method = "mrwd",
                  gc.order = c(0, 0, 4)
)
par(mar = c(2,2,2,2))
plot(M7for)

auto.arima(M8fit$gc,stepwise = FALSE,approximation = FALSE)

Series: M8fit$gc 
ARIMA(2,1,3) 

Coefficients:
         ar1     ar2      ma1     ma2      ma3
      0.0283  0.9434  -1.8457  1.7241  -0.5896
s.e.  0.0629  0.0627   0.0799  0.1309   0.1008

sigma^2 = 0.00391:  log likelihood = 184.06
AIC=-356.11   AICc=-355.47   BIC=-338.5

M8for <- forecast(M8fit,
                  h = 10,
                  kt.method = "iarima",
                  gc.order = c(2, 1, 3))
par(mar = c(2,2,2,2))
plot(M8for)

auto.arima(PLATfit$gc,stepwise = FALSE,approximation = FALSE)

Series: PLATfit$gc 
ARIMA(3,0,2) with non-zero mean 

Coefficients:
         ar1      ar2     ar3      ma1     ma2     mean
      0.9596  -0.5963  0.5752  -0.1849  0.9455  -0.0179
s.e.  0.1207   0.1429  0.1182   0.0767  0.0402   0.0674

sigma^2 = 0.00108:  log likelihood = 280.03
AIC=-546.07   AICc=-545.22   BIC=-525.47

PLATfor <- forecast(PLATfit, 
                    h = 10, 
                    gc.order = c(3, 0, 2))
plot(PLATfor)

auto.arima(sPLATfit$gc,stepwise = FALSE,approximation = FALSE)

Series: sPLATfit$gc 
ARIMA(1,0,4) with zero mean 

Coefficients:
         ar1      ma1     ma2     ma3      ma4
      0.9373  -0.1245  0.3112  0.1378  -0.2370
s.e.  0.0349   0.1038  0.1218  0.1031   0.1341

sigma^2 = 0.001099:  log likelihood = 279.33
AIC=-546.67   AICc=-546.04   BIC=-529.02

sPLATfor <- forecast(PLATfit, 
                     h = 10, 
                     gc.order = c(1, 0, 4))
par(mar = c(2,2,2,2))
plot(sPLATfor)