Initial setting of wd

setwd("/Users/iyeonghan/skku/25-1/Time Series/Final/실습")
rm(list = ls(all = TRUE))

import libraries

library(TSlibrary)
library(itsmr)
## 
## Attaching package: 'itsmr'
## The following objects are masked from 'package:TSlibrary':
## 
##     airpass, season, smooth.exp, smooth.ma, trend
library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## 
## Attaching package: 'tseries'
## The following object is masked from 'package:itsmr':
## 
##     arma
library(urca)
library(nortest)
library(forecast)
## 
## Attaching package: 'forecast'
## The following object is masked from 'package:itsmr':
## 
##     forecast
library(nlme)
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:forecast':
## 
##     getResponse
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:itsmr':
## 
##     deaths

Define functions

sstest <- function(model) {
  # ADF Test
  adf <- adf.test(model, alternative = "stationary")
  adf_stat <- adf$statistic
  adf_pval <- adf$p.value
  
  # KPSS Test
  kpss <- kpss.test(model, null = "Level")
  kpss_stat <- kpss$statistic
  kpss_pval <- kpss$p.value
  
  # PP Test
  pp <- pp.test(model)
  pp_stat <- pp$statistic
  pp_pval <- pp$p.value
  
  sig_star <- function(p) {
    if (is.na(p)) return("")
    else if (p <= 0.001) return("***")
    else if (p <= 0.01)  return("**")
    else if (p <= 0.05)  return("*")
    else if (p <= 0.1)   return(".")
    else return("")
  }
  
  result <- data.frame(
    Test = c("ADF", "KPSS", "PP"),
    `Test Statistic` = round(c(adf_stat, kpss_stat, pp_stat), 4),
    `p-value` = signif(c(adf_pval, kpss_pval, pp_pval), 4),
    `Signif.` = sapply(c(adf_pval, kpss_pval, pp_pval), sig_star),
    `Null Hypothesis` = c(
      "Unit root (non-stationary)",
      "Stationarity",
      "Unit root (non-stationary)"
    )
  )
  
  
  return(result)
}

ntests <- function(model) {
  
  jb <- jarque.bera.test(model)
  shapiro <- shapiro.test(model)
  ad <- ad.test(model)
  cvm <- cvm.test(model)
  lillie <- lillie.test(model)
  
  sig_star <- function(p) {
    if (is.na(p)) return("")
    else if (p <= 0.001) return("***")
    else if (p <= 0.01)  return("**")
    else if (p <= 0.05)  return("*")
    else if (p <= 0.1)   return(".")
    else return("")
  }
  
  result <- data.frame(
    Test = c("Jarque-Bera", "Shapiro-Wilk", "Anderson-Darling", "Cramer-von Mises", "Lilliefors"),
    `Test Statistic` = round(c(jb$statistic, shapiro$statistic, ad$statistic, cvm$statistic, lillie$statistic), 4),
    `p-value` = signif(c(jb$p.value, shapiro$p.value, ad$p.value, cvm$p.value, lillie$p.value), 4),
    `Signif.` = sapply(c(jb$p.value, shapiro$p.value, ad$p.value, cvm$p.value, lillie$p.value), sig_star),
    `Null Hypothesis` = "Normality"
  )
  
  return(result)
}

load data

data <- read.csv("2024practice2.csv", header = FALSE)
data <- ts(data, start = c(1924,01), end = c(2023, 12), frequency = 12)
time <- as.vector(time(data))
n <- length(data)

1.

(a) Time plot and Correlogram

plot.ts(data, main = "Given Data")

acf2(data, lag.max = 50); title("Correlogram")

### (b) constant variance but linear trend exists, so the data is not stationary. Also, the seasonality with period 12 is included ###(c)

x <- seq(from = 1, to = n, by = 1)
reg1 <- lm(data ~ 1 + x)
resid1 <- reg1$residuals

to capture seasonality, use cos and sin function.

t <- 1:n; f1 <- floor(n/12); f2 <- 2*f1; f3 <- 3*f1; f4 <- 4*f1; f5 <- 5*f1; f6 <- 6*f1
cos1 <- cos(f1*2*pi/n*t); sin1 <- sin(f1*2*pi/n*t)
cos2 <- cos(f2*2*pi/n*t); sin2 <- sin(f2*2*pi/n*t)
cos3 <- cos(f3*2*pi/n*t); sin3 <- sin(f3*2*pi/n*t)
cos4 <- cos(f4*2*pi/n*t); sin4 <- sin(f4*2*pi/n*t)
cos5 <- cos(f5*2*pi/n*t); sin5 <- sin(f5*2*pi/n*t)
cos6 <- cos(f6*2*pi/n*t); sin6 <- sin(f6*2*pi/n*t)

reg2 <- lm(data ~ 1 + x + cos1 + sin1)
par(mfrow = c(1,1))
plot.ts(data, main = "Given Data")
lines(time, reg2$fitted.values, col = "red")

resid2 <- reg2$residuals

reg3 <- lm(data ~ 1 + x + cos1 + sin1 + cos2 + sin2)
par(mfrow = c(1,1))
plot.ts(data, main = "Given Data")
lines(time, reg3$fitted.values, col = "red")

resid3 <- reg3$residuals

reg4 <- lm(data ~ 1 + x + cos1 + sin1 + cos2 + sin2 + cos3 + sin3)
par(mfrow = c(1,1))
plot.ts(data, main = "Given Data")
lines(time, reg4$fitted.values, col = "red")

resid4 <- reg4$residuals

reg5 <- lm(data ~ 1 + x + cos1 + sin1 + cos2 + sin2 + cos3 + sin3 + cos4 + sin4)
par(mfrow = c(1,1))
plot.ts(data, main = "Given Data")
lines(time, reg5$fitted.values, col = "red")

resid5 <- reg5$residuals

layout(matrix(c(1,1,2,3), 2, 2, byrow = TRUE))
plot.ts(resid1, main = "residual from reg1")
acf2(resid1, lag.max = 100); title("ACF")
Pacf(resid1, lag=n/4, main = "PACF")

sstest(resid1)
## Warning in adf.test(model, alternative = "stationary"): p-value smaller than
## printed p-value
## Warning in kpss.test(model, null = "Level"): p-value greater than printed
## p-value
## Warning in pp.test(model): p-value smaller than printed p-value
##                        Test Test.Statistic p.value Signif.
## Dickey-Fuller           ADF        -9.8630    0.01      **
## KPSS Level             KPSS         0.0049    0.10       .
## Dickey-Fuller Z(alpha)   PP      -223.9978    0.01      **
##                                   Null.Hypothesis
## Dickey-Fuller          Unit root (non-stationary)
## KPSS Level                           Stationarity
## Dickey-Fuller Z(alpha) Unit root (non-stationary)
test(resid1)
## Null hypothesis: Residuals are iid noise.
## Test                        Distribution Statistic   p-value
## Ljung-Box Q                Q ~ chisq(20)   11318.8         0 *
## McLeod-Li Q                Q ~ chisq(20)   7621.96         0 *
## Turning points T (T-798.7)/14.6 ~ N(0,1)       217         0 *
## Diff signs S       (S-599.5)/10 ~ N(0,1)       664         0 *
## Rank P        (P-359700)/6932.5 ~ N(0,1)    359314    0.9556

layout(matrix(c(1,1,2,3), 2, 2, byrow = TRUE))
plot.ts(resid2, main = "residual from reg2")
acf2(resid2, lag.max = 100); title("ACF")
Pacf(resid2, lag=n/4, main = "PACF")

sstest(resid2)
## Warning in adf.test(model, alternative = "stationary"): p-value smaller than
## printed p-value
## Warning in kpss.test(model, null = "Level"): p-value greater than printed
## p-value
## Warning in pp.test(model): p-value smaller than printed p-value
##                        Test Test.Statistic p.value Signif.
## Dickey-Fuller           ADF       -10.0202    0.01      **
## KPSS Level             KPSS         0.0905    0.10       .
## Dickey-Fuller Z(alpha)   PP      -808.2333    0.01      **
##                                   Null.Hypothesis
## Dickey-Fuller          Unit root (non-stationary)
## KPSS Level                           Stationarity
## Dickey-Fuller Z(alpha) Unit root (non-stationary)
test(resid2)
## Null hypothesis: Residuals are iid noise.
## Test                        Distribution Statistic   p-value
## Ljung-Box Q                Q ~ chisq(20)    454.06         0 *
## McLeod-Li Q                Q ~ chisq(20)     49.64     2e-04 *
## Turning points T (T-798.7)/14.6 ~ N(0,1)       691         0 *
## Diff signs S       (S-599.5)/10 ~ N(0,1)       588    0.2503
## Rank P        (P-359700)/6932.5 ~ N(0,1)    358805    0.8973

layout(matrix(c(1,1,2,3), 2, 2, byrow = TRUE))
plot.ts(resid3, main = "residual from reg3")
acf2(resid3, lag.max = 100); title("ACF")
Pacf(resid3, lag=n/4, main = "PACF")

sstest(resid3)
## Warning in adf.test(model, alternative = "stationary"): p-value smaller than
## printed p-value
## Warning in kpss.test(model, null = "Level"): p-value greater than printed
## p-value
## Warning in pp.test(model): p-value smaller than printed p-value
##                        Test Test.Statistic p.value Signif.
## Dickey-Fuller           ADF        -9.8230    0.01      **
## KPSS Level             KPSS         0.0963    0.10       .
## Dickey-Fuller Z(alpha)   PP     -1005.1273    0.01      **
##                                   Null.Hypothesis
## Dickey-Fuller          Unit root (non-stationary)
## KPSS Level                           Stationarity
## Dickey-Fuller Z(alpha) Unit root (non-stationary)
test(resid3)
## Null hypothesis: Residuals are iid noise.
## Test                        Distribution Statistic   p-value
## Ljung-Box Q                Q ~ chisq(20)    130.93         0 *
## McLeod-Li Q                Q ~ chisq(20)     46.59     7e-04 *
## Turning points T (T-798.7)/14.6 ~ N(0,1)       731         0 *
## Diff signs S       (S-599.5)/10 ~ N(0,1)       595    0.6528
## Rank P        (P-359700)/6932.5 ~ N(0,1)    360625    0.8939

layout(matrix(c(1,1,2,3), 2, 2, byrow = TRUE))
plot.ts(resid4, main = "residual from reg4")
acf2(resid4, lag.max = 100); title("ACF")
Pacf(resid4, lag=n/4, main = "PACF")

sstest(resid4)
## Warning in adf.test(model, alternative = "stationary"): p-value smaller than
## printed p-value
## Warning in kpss.test(model, null = "Level"): p-value greater than printed
## p-value
## Warning in pp.test(model): p-value smaller than printed p-value
##                        Test Test.Statistic p.value Signif.
## Dickey-Fuller           ADF        -9.5873    0.01      **
## KPSS Level             KPSS         0.0955    0.10       .
## Dickey-Fuller Z(alpha)   PP     -1019.0243    0.01      **
##                                   Null.Hypothesis
## Dickey-Fuller          Unit root (non-stationary)
## KPSS Level                           Stationarity
## Dickey-Fuller Z(alpha) Unit root (non-stationary)
test(resid4)
## Null hypothesis: Residuals are iid noise.
## Test                        Distribution Statistic   p-value
## Ljung-Box Q                Q ~ chisq(20)      95.9         0 *
## McLeod-Li Q                Q ~ chisq(20)     62.43         0 *
## Turning points T (T-798.7)/14.6 ~ N(0,1)       763    0.0145 *
## Diff signs S       (S-599.5)/10 ~ N(0,1)       598    0.8808
## Rank P        (P-359700)/6932.5 ~ N(0,1)    359848     0.983

layout(matrix(c(1,1,2,3), 2, 2, byrow = TRUE))
plot.ts(resid5, main = "residual from reg5")
acf2(resid5, lag.max = 100); title("ACF")
Pacf(resid5, lag=100, main = "PACF")

sstest(resid5)
## Warning in adf.test(model, alternative = "stationary"): p-value smaller than
## printed p-value
## Warning in kpss.test(model, null = "Level"): p-value greater than printed
## p-value
## Warning in pp.test(model): p-value smaller than printed p-value
##                        Test Test.Statistic p.value Signif.
## Dickey-Fuller           ADF        -9.4400    0.01      **
## KPSS Level             KPSS         0.0958    0.10       .
## Dickey-Fuller Z(alpha)   PP      -987.9755    0.01      **
##                                   Null.Hypothesis
## Dickey-Fuller          Unit root (non-stationary)
## KPSS Level                           Stationarity
## Dickey-Fuller Z(alpha) Unit root (non-stationary)
test(resid5)
## Null hypothesis: Residuals are iid noise.
## Test                        Distribution Statistic   p-value
## Ljung-Box Q                Q ~ chisq(20)    101.36         0 *
## McLeod-Li Q                Q ~ chisq(20)     69.18         0 *
## Turning points T (T-798.7)/14.6 ~ N(0,1)       773    0.0786
## Diff signs S       (S-599.5)/10 ~ N(0,1)       605    0.5825
## Rank P        (P-359700)/6932.5 ~ N(0,1)    360963    0.8554

summary(reg4)
## 
## Call:
## lm(formula = data ~ 1 + x + cos1 + sin1 + cos2 + sin2 + cos3 + 
##     sin3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.16080 -0.18999  0.00499  0.18408  1.11551 
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept) -1.392e+01  1.711e-02 -813.735  < 2e-16 ***
## x            2.804e-02  2.467e-05 1136.446  < 2e-16 ***
## cos1        -2.367e+00  1.209e-02 -195.805  < 2e-16 ***
## sin1        -1.839e+00  1.209e-02 -152.153  < 2e-16 ***
## cos2        -2.086e-01  1.209e-02  -17.259  < 2e-16 ***
## sin2        -4.204e-02  1.209e-02   -3.478 0.000523 ***
## cos3         1.080e-01  1.209e-02    8.937  < 2e-16 ***
## sin3        -6.197e-02  1.209e-02   -5.127 3.44e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2961 on 1192 degrees of freedom
## Multiple R-squared:  0.9991, Adjusted R-squared:  0.9991 
## F-statistic: 1.936e+05 on 7 and 1192 DF,  p-value: < 2.2e-16
summary(reg5)
## 
## Call:
## lm(formula = data ~ 1 + x + cos1 + sin1 + cos2 + sin2 + cos3 + 
##     sin3 + cos4 + sin4)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.15401 -0.18569  0.00561  0.17677  1.12239 
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept) -1.392e+01  1.689e-02 -824.030  < 2e-16 ***
## x            2.804e-02  2.437e-05 1150.821  < 2e-16 ***
## cos1        -2.367e+00  1.194e-02 -198.281  < 2e-16 ***
## sin1        -1.839e+00  1.194e-02 -154.077  < 2e-16 ***
## cos2        -2.086e-01  1.194e-02  -17.477  < 2e-16 ***
## sin2        -4.204e-02  1.194e-02   -3.522 0.000444 ***
## cos3         1.080e-01  1.194e-02    9.050  < 2e-16 ***
## sin3        -6.197e-02  1.194e-02   -5.191 2.45e-07 ***
## cos4        -5.508e-02  1.194e-02   -4.615 4.36e-06 ***
## sin4        -3.967e-02  1.194e-02   -3.323 0.000917 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2924 on 1190 degrees of freedom
## Multiple R-squared:  0.9991, Adjusted R-squared:  0.9991 
## F-statistic: 1.544e+05 on 9 and 1190 DF,  p-value: < 2.2e-16
lms <- list(reg1, reg2, reg3, reg4, reg5)

aic.lms <- numeric(5)
bic.lms <- numeric(5)
d <- 0
for (lm in lms) {
  d <- d + 1
  aic.lms[d] <- AIC(lm)
  bic.lms[d] <- BIC(lm) 
}

y_min <- min(aic.lms, bic.lms)
y_max <- max(aic.lms, bic.lms)

plot(0:4, aic.lms, type="b", col="blue", pch=19, 
     xlab="Harmonic term order", ylab="Criterion Value",
     main="AIC and BIC of Regression",
     ylim=c(y_min, y_max))

lines(0:4, bic.lms, type="b", col="red", pch=19)
legend("topright", legend=c("AIC", "BIC"), col=c("blue", "red"), pch=19, lty=1)

d.aic <- aic.lms - min(aic.lms)
d.bic <- bic.lms - min(bic.lms)

y_min <- min(d.aic, d.bic)
y_max <- max(d.aic, d.bic)

plot(0:4, d.aic, type="b", col="blue", pch=19, 
     xlab="Harmonic term order", ylab="Difference from Minimum",
     main="Difference of AIC and BIC",
     ylim=c(y_min, y_max))

lines(0:4, d.bic, type="b", col="red", pch=19)
abline(h = 2) #if the value does not exceed 2, the model show almost same performance.
legend("topright", legend=c("AIC - min(AIC)", "BIC - min(BIC)"), col=c("blue", "red"), pch=19, lty=1)

### In terms of both AIC and BIC, the regression with linear trend and 4th order harmonic series is the best. ### From SACF and SPACF of the residuals, error~ARMA(1,3)

ARMA(1,1) ~ ARMA(1,3)

X <- cbind(x, cos1, sin1, cos2, sin2, cos3, sin3, cos4, sin4)
arma0 <- Arima(data, order = c(1,0,0), xreg = X, include.mean = TRUE)
arma0
## Series: data 
## Regression with ARIMA(1,0,0) errors 
## 
## Coefficients:
##          ar1  intercept      x     cos1     sin1     cos2     sin2    cos3
##       0.2317   -13.9192  0.028  -2.3667  -1.8391  -0.2087  -0.0420  0.1080
## s.e.  0.0281     0.0213  0.000   0.0143   0.0143   0.0128   0.0128  0.0113
##          sin3     cos4     sin4
##       -0.0620  -0.0551  -0.0397
## s.e.   0.0113   0.0102   0.0102
## 
## sigma^2 = 0.08096:  log likelihood = -188.95
## AIC=401.9   AICc=402.16   BIC=462.98
arma1 <- Arima(data, order = c(1,0,1), xreg = X, include.mean = TRUE)
arma1
## Series: data 
## Regression with ARIMA(1,0,1) errors 
## 
## Coefficients:
##          ar1      ma1  intercept      x     cos1     sin1     cos2     sin2
##       0.4929  -0.2775   -13.9195  0.028  -2.3668  -1.8391  -0.2086  -0.0420
## s.e.  0.1007   0.1113     0.0232  0.000   0.0143   0.0143   0.0119   0.0119
##         cos3     sin3     cos4     sin4
##       0.1080  -0.0619  -0.0552  -0.0396
## s.e.  0.0107   0.0107   0.0102   0.0102
## 
## sigma^2 = 0.08068:  log likelihood = -186.35
## AIC=398.71   AICc=399.01   BIC=464.88
arma2 <- Arima(data, order = c(1,0,2), xreg = X, include.mean = TRUE)
arma2
## Series: data 
## Regression with ARIMA(1,0,2) errors 
## 
## Coefficients:
##          ar1     ma1      ma2  intercept      x     cos1     sin1     cos2
##       0.5159  -0.300  -0.0078   -13.9195  0.028  -2.3667  -1.8390  -0.2086
## s.e.  0.2030   0.204   0.0619     0.0233  0.000   0.0142   0.0142   0.0119
##          sin2    cos3     sin3     cos4     sin4
##       -0.0419  0.1080  -0.0619  -0.0551  -0.0397
## s.e.   0.0119  0.0108   0.0108   0.0102   0.0102
## 
## sigma^2 = 0.08074:  log likelihood = -186.34
## AIC=400.69   AICc=401.04   BIC=471.95
arma3 <- Arima(data, order = c(1,0,3), xreg = X, include.mean = TRUE)
arma3
## Series: data 
## Regression with ARIMA(1,0,3) errors 
## 
## Coefficients:
##          ar1     ma1     ma2     ma3  intercept      x     cos1     sin1
##       -0.454  0.6744  0.1991  0.1199   -13.9195  0.028  -2.3668  -1.8391
## s.e.   0.222  0.2208  0.0580  0.0294     0.0223  0.000   0.0147   0.0147
##          cos2     sin2    cos3     sin3     cos4     sin4
##       -0.2086  -0.0419  0.1079  -0.0620  -0.0551  -0.0397
## s.e.   0.0121   0.0121  0.0102   0.0102   0.0106   0.0106
## 
## sigma^2 = 0.08045:  log likelihood = -183.67
## AIC=397.34   AICc=397.75   BIC=473.69
armas <- list(arma0, arma1, arma2, arma3)

aicc.armas <- numeric(4)
bic.armas <- numeric(4)
k <- 0
for (mod in armas) {
  k <- k + 1
  aicc.armas[k] <- mod$aicc
  bic.armas[k] <- mod$bic
}

y_min <- min(aicc.armas, bic.armas)
y_max <- max(aicc.armas, bic.armas)

par(mfrow = c(1,2))

plot(0:3, aicc.armas, type="b", col="blue", pch=19, 
     xlab="q", ylab="Criterion Value",
     main="AICc and BIC of ARMA(1,q)",
     ylim=c(y_min, y_max))

lines(0:3, bic.armas, type="b", col="red", pch=19)
legend("topright", legend=c("AICc", "BIC"), col=c("blue", "red"), pch=19, lty=1)

d.aicc <- aicc.armas - min(aicc.armas)
d.bic <- bic.armas - min(bic.armas)

y_min <- min(d.aicc, d.bic)
y_max <- max(d.aicc, d.bic)

plot(0:3, d.aicc, type="b", col="blue", pch=19, 
     xlab="q", ylab="Difference from Minimum",
     main="Difference of AICc and BIC",
     ylim=c(y_min, y_max))

lines(0:3, d.bic, type="b", col="red", pch=19)
abline(h = 2) 
legend("topright", legend=c("AICc - min(AIC)", "BIC - min(BIC)"), col=c("blue", "red"), pch=19, lty=1)

test(residuals(arma0))
## Null hypothesis: Residuals are iid noise.
## Test                        Distribution Statistic   p-value
## Ljung-Box Q                Q ~ chisq(20)     23.17    0.2804
## McLeod-Li Q                Q ~ chisq(20)     70.67         0 *
## Turning points T (T-798.7)/14.6 ~ N(0,1)       805    0.6643
## Diff signs S       (S-599.5)/10 ~ N(0,1)       613    0.1772
## Rank P        (P-359700)/6932.5 ~ N(0,1)    359292    0.9531

ntests(residuals(arma0))
##               Test Test.Statistic   p.value Signif. Null.Hypothesis
## 1      Jarque-Bera        32.6802 8.009e-08     ***       Normality
## 2     Shapiro-Wilk         0.9944 1.914e-04     ***       Normality
## 3 Anderson-Darling         1.4917 7.571e-04     ***       Normality
## 4 Cramer-von Mises         0.2418 1.627e-03      **       Normality
## 5       Lilliefors         0.0287 2.168e-02       *       Normality
test(residuals(arma1))
## Null hypothesis: Residuals are iid noise.
## Test                        Distribution Statistic   p-value
## Ljung-Box Q                Q ~ chisq(20)        17    0.6527
## McLeod-Li Q                Q ~ chisq(20)     71.49         0 *
## Turning points T (T-798.7)/14.6 ~ N(0,1)       797    0.9091
## Diff signs S       (S-599.5)/10 ~ N(0,1)       609    0.3423
## Rank P        (P-359700)/6932.5 ~ N(0,1)    359294    0.9533

ntests(residuals(arma1))
##               Test Test.Statistic   p.value Signif. Null.Hypothesis
## 1      Jarque-Bera        31.4349 1.493e-07     ***       Normality
## 2     Shapiro-Wilk         0.9946 2.798e-04     ***       Normality
## 3 Anderson-Darling         1.4410 1.008e-03      **       Normality
## 4 Cramer-von Mises         0.2286 2.358e-03      **       Normality
## 5       Lilliefors         0.0264 4.857e-02       *       Normality
test(residuals(arma2))
## Null hypothesis: Residuals are iid noise.
## Test                        Distribution Statistic   p-value
## Ljung-Box Q                Q ~ chisq(20)     17.01    0.6522
## McLeod-Li Q                Q ~ chisq(20)     71.58         0 *
## Turning points T (T-798.7)/14.6 ~ N(0,1)       797    0.9091
## Diff signs S       (S-599.5)/10 ~ N(0,1)       609    0.3423
## Rank P        (P-359700)/6932.5 ~ N(0,1)    359231    0.9461

ntests(residuals(arma2))
##               Test Test.Statistic   p.value Signif. Null.Hypothesis
## 1      Jarque-Bera        31.5165 1.433e-07     ***       Normality
## 2     Shapiro-Wilk         0.9946 2.757e-04     ***       Normality
## 3 Anderson-Darling         1.4427 9.988e-04     ***       Normality
## 4 Cramer-von Mises         0.2293 2.315e-03      **       Normality
## 5       Lilliefors         0.0265 4.679e-02       *       Normality
test(residuals(arma3))
## Null hypothesis: Residuals are iid noise.
## Test                        Distribution Statistic   p-value
## Ljung-Box Q                Q ~ chisq(20)     12.38    0.9022
## McLeod-Li Q                Q ~ chisq(20)     67.94         0 *
## Turning points T (T-798.7)/14.6 ~ N(0,1)       791    0.5994
## Diff signs S       (S-599.5)/10 ~ N(0,1)       604    0.6528
## Rank P        (P-359700)/6932.5 ~ N(0,1)    359560    0.9839

ntests(residuals(arma3))
##               Test Test.Statistic   p.value Signif. Null.Hypothesis
## 1      Jarque-Bera        31.8217 1.230e-07     ***       Normality
## 2     Shapiro-Wilk         0.9945 2.300e-04     ***       Normality
## 3 Anderson-Darling         1.4396 1.016e-03      **       Normality
## 4 Cramer-von Mises         0.2200 3.019e-03      **       Normality
## 5       Lilliefors         0.0282 2.588e-02       *       Normality

I determine the best model as ARMA(1,1) since in terms of both AICc and BIC the model shows good performance.

Also, the Ljung Box test result says ARMA(1,1) error is WN.

(d) SARIMA

d1.data <- diff(data)
layout(matrix(c(1,1,2,3),2,2, byrow = TRUE))
plot.ts(d1.data, main = "d = 1")
acf2(d1.data, lag.max = 100); title("ACF")
Pacf(d1.data, lag.max = 100)

D1.data <- diff(data,12)
layout(matrix(c(1,1,2,3),2,2, byrow = TRUE))
plot.ts(D1.data, main = "D = 1")
acf2(D1.data, lag.max = 100); title("ACF")
Pacf(D1.data, lag.max = 100)

dD1.data <- diff(diff(data,12))
layout(matrix(c(1,1,2,3),2,2, byrow = TRUE))
plot.ts(dD1.data, main = "d = 1, D = 1")
acf2(dD1.data, lag.max = 100); title("ACF")
Pacf(dD1.data, lag.max = 100)

sstest(d1.data)
## Warning in adf.test(model, alternative = "stationary"): p-value smaller than
## printed p-value
## Warning in kpss.test(model, null = "Level"): p-value greater than printed
## p-value
## Warning in pp.test(model): p-value smaller than printed p-value
##                        Test Test.Statistic p.value Signif.
## Dickey-Fuller           ADF       -39.4140    0.01      **
## KPSS Level             KPSS         0.0063    0.10       .
## Dickey-Fuller Z(alpha)   PP      -259.0648    0.01      **
##                                   Null.Hypothesis
## Dickey-Fuller          Unit root (non-stationary)
## KPSS Level                           Stationarity
## Dickey-Fuller Z(alpha) Unit root (non-stationary)
sstest(D1.data)
## Warning in adf.test(model, alternative = "stationary"): p-value smaller than
## printed p-value
## Warning in kpss.test(model, null = "Level"): p-value greater than printed
## p-value
## Warning in pp.test(model): p-value smaller than printed p-value
##                        Test Test.Statistic p.value Signif.
## Dickey-Fuller           ADF       -10.8794    0.01      **
## KPSS Level             KPSS         0.0103    0.10       .
## Dickey-Fuller Z(alpha)   PP     -1013.5712    0.01      **
##                                   Null.Hypothesis
## Dickey-Fuller          Unit root (non-stationary)
## KPSS Level                           Stationarity
## Dickey-Fuller Z(alpha) Unit root (non-stationary)
sstest(dD1.data)
## Warning in adf.test(model, alternative = "stationary"): p-value smaller than
## printed p-value
## Warning in kpss.test(model, null = "Level"): p-value greater than printed
## p-value
## Warning in pp.test(model): p-value smaller than printed p-value
##                        Test Test.Statistic p.value Signif.
## Dickey-Fuller           ADF       -10.2797    0.01      **
## KPSS Level             KPSS         0.0075    0.10       .
## Dickey-Fuller Z(alpha)   PP     -1334.3421    0.01      **
##                                   Null.Hypothesis
## Dickey-Fuller          Unit root (non-stationary)
## KPSS Level                           Stationarity
## Dickey-Fuller Z(alpha) Unit root (non-stationary)

From ACF and PACF, d=1, D=1 is the best to fit SARMA

order selection for SARMA

par(mfrow = c(1,1))
acf2(D1.data, lag.max = n/4); title("ACF")

Pacf(D1.data, lag.max = n/4)

init.sarima <- auto.arima(data, d=0, D=1, max.q = 3, max.Q = 1, max.p = 1, max.P = 8, stepwise = FALSE, approximation = FALSE)
sarima1 <- arima(data, order = c(1,0,3), seasonal = list(order = c(2,1,1)))
sarima2 <- arima(x = data, order = c(1, 0, 3), seasonal = list(order = c(2, 1, 0)))
sarima4 <- arima(x = data, order = c(4, 1, 1), seasonal = list(order = c(5, 1, 1)))
sarima7 <- arima(x = data, order = c(1, 0, 3), seasonal = list(order = c(4, 1, 0)))

test(init.sarima$residuals)
## Null hypothesis: Residuals are iid noise.
## Test                        Distribution Statistic   p-value
## Ljung-Box Q                Q ~ chisq(20)     33.03    0.0335 *
## McLeod-Li Q                Q ~ chisq(20)     80.72         0 *
## Turning points T (T-798.7)/14.6 ~ N(0,1)       837    0.0086 *
## Diff signs S       (S-599.5)/10 ~ N(0,1)       603    0.7264
## Rank P        (P-359700)/6932.5 ~ N(0,1)    366088    0.3568

test(sarima1$residuals)
## Null hypothesis: Residuals are iid noise.
## Test                        Distribution Statistic   p-value
## Ljung-Box Q                Q ~ chisq(20)     20.38    0.4346
## McLeod-Li Q                Q ~ chisq(20)     75.76         0 *
## Turning points T (T-798.7)/14.6 ~ N(0,1)       803    0.7665
## Diff signs S       (S-599.5)/10 ~ N(0,1)       615    0.1213
## Rank P        (P-359700)/6932.5 ~ N(0,1)    359657    0.9951

test(sarima2$residuals)
## Null hypothesis: Residuals are iid noise.
## Test                        Distribution Statistic   p-value
## Ljung-Box Q                Q ~ chisq(20)     31.58     0.048 *
## McLeod-Li Q                Q ~ chisq(20)     66.96         0 *
## Turning points T (T-798.7)/14.6 ~ N(0,1)       807     0.568
## Diff signs S       (S-599.5)/10 ~ N(0,1)       611    0.2503
## Rank P        (P-359700)/6932.5 ~ N(0,1)    357897    0.7948

test(sarima4$residuals)
## Null hypothesis: Residuals are iid noise.
## Test                        Distribution Statistic   p-value
## Ljung-Box Q                Q ~ chisq(20)     12.93    0.8802
## McLeod-Li Q                Q ~ chisq(20)     77.18         0 *
## Turning points T (T-798.7)/14.6 ~ N(0,1)       791    0.5994
## Diff signs S       (S-599.5)/10 ~ N(0,1)       614    0.1472
## Rank P        (P-359700)/6932.5 ~ N(0,1)    361764    0.7659

test(sarima7$residuals)
## Null hypothesis: Residuals are iid noise.
## Test                        Distribution Statistic   p-value
## Ljung-Box Q                Q ~ chisq(20)     24.13    0.2366
## McLeod-Li Q                Q ~ chisq(20)      79.8         0 *
## Turning points T (T-798.7)/14.6 ~ N(0,1)       809    0.4789
## Diff signs S       (S-599.5)/10 ~ N(0,1)       611    0.2503
## Rank P        (P-359700)/6932.5 ~ N(0,1)    358472    0.8594

sarimas <- list(init.sarima, sarima1, sarima2, sarima4, sarima7)
aic.sarimas <- numeric(5)
bic.sarimas <- numeric(5)
k <- 0
for (mod in sarimas) {
  k <- k + 1
  aic.sarimas[k] <- AIC(mod)
  bic.sarimas[k] <- BIC(mod)
}

y_min <- min(aic.sarimas, bic.sarimas)
y_max <- max(aic.sarimas, bic.sarimas)

par(mfrow = c(1,2))

plot(0:4, aic.sarimas, type="b", col="blue", pch=19, 
     xlab="p", ylab="Criterion Value",
     main="AIC and BIC of SARIMA(p,1,1)(0,1,1)",
     ylim=c(y_min, y_max))

lines(0:4, bic.sarimas, type="b", col="red", pch=19)
legend("topright", legend=c("AIC", "BIC"), col=c("blue", "red"), pch=19, lty=1)

d.aic <- aic.sarimas - min(aic.sarimas)
d.bic <- bic.sarimas - min(bic.sarimas)

y_min <- min(d.aic, d.bic)
y_max <- max(d.aic, d.bic)

plot(0:4, d.aic, type="b", col="blue", pch=19, 
     xlab="p", ylab="Difference from Minimum",
     main="Difference of AIC and BIC",
     ylim=c(y_min, y_max))

lines(0:4, d.bic, type="b", col="red", pch=19)
abline(h = 2) #if the value does not exceed 2, the model show almost same performance.
legend("topright", legend=c("AIC - min(AIC)", "BIC - min(BIC)"), col=c("blue", "red"), pch=19, lty=1)

sarima1
## 
## Call:
## arima(x = data, order = c(1, 0, 3), seasonal = list(order = c(2, 1, 1)))
## 
## Coefficients:
##       ar1      ma1      ma2      ma3     sar1     sar2     sma1
##         1  -0.7871  -0.1209  -0.0784  -0.0034  -0.0394  -0.9695
## s.e.    0   0.0290   0.0410   0.0303   0.0308   0.0305   0.0138
## 
## sigma^2 estimated as 0.08244:  log likelihood = -224.52,  aic = 465.04
par(mfrow = c(1,1))
fc.sarima <- forecast(sarima1, h = 4)
plot(fc.sarima, xlim = c(2020,2025))

h <- 4
newx <- (n+1):(n+h)
ncos1 <- cos(f1*2*pi/n*newx); nsin1 <- sin(f1*2*pi/n*newx)
ncos2 <- cos(f2*2*pi/n*newx); nsin2 <- sin(f2*2*pi/n*newx)
ncos3 <- cos(f3*2*pi/n*newx); nsin3 <- sin(f3*2*pi/n*newx)
ncos4 <- cos(f4*2*pi/n*newx); nsin4 <- sin(f4*2*pi/n*newx)

fc.reg <- forecast(arma1, h = h, xreg = cbind(x = newx, cos1 = ncos1, sin1 = nsin1, cos2 = ncos2, sin2 = nsin2, cos3 = ncos3, sin3 = nsin3, cos4 = ncos4, sin4 = nsin4))
plot(fc.reg, xlim = c(2020,2025))
lines(time, reg1$fitted.values, col = "red")

library(knitr)
knitr::spin("2024final.R", knit = FALSE)
## [1] "2024final.Rmd"

`````