```` # Set initial environment

setwd("/Users/iyeonghan/skku/25-1/Time Series/Exercises")
rm(list = ls(all = TRUE))

Import libraries and data

df <- read.table(file = "2022practice1.csv", sep = ",")
data <- ts(df$V2, start = c(2005,1), end = c(2018, 12), frequency = 12)
time = as.vector(time(data))

library(TSlibrary)
library(itsmr)

## 
## Attaching package: 'itsmr'

## The following objects are masked from 'package:TSlibrary':
## 
##     airpass, season, smooth.exp, smooth.ma, trend

library(tseries)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## 
## Attaching package: 'tseries'

## The following object is masked from 'package:itsmr':
## 
##     arma

library(urca)
library(nortest)

Function for stationary & normality test

sstest <- function(model) {
  # ADF Test
  adf <- adf.test(model, alternative = "stationary")
  adf_stat <- adf$statistic
  adf_pval <- adf$p.value
  
  # KPSS Test
  kpss <- kpss.test(model, null = "Level")
  kpss_stat <- kpss$statistic
  kpss_pval <- kpss$p.value
  
  # PP Test
  pp <- pp.test(model)
  pp_stat <- pp$statistic
  pp_pval <- pp$p.value
  
  sig_star <- function(p) {
    if (is.na(p)) return("")
    else if (p <= 0.001) return("***")
    else if (p <= 0.01)  return("**")
    else if (p <= 0.05)  return("*")
    else if (p <= 0.1)   return(".")
    else return("")
    }

  result <- data.frame(
    Test = c("ADF", "KPSS", "PP"),
    `Test Statistic` = round(c(adf_stat, kpss_stat, pp_stat), 4),
    `p-value` = signif(c(adf_pval, kpss_pval, pp_pval), 4),
    `Signif.` = sapply(c(adf_pval, kpss_pval, pp_pval), sig_star),
    `Null Hypothesis` = c(
      "Unit root (non-stationary)",
      "Stationarity",
      "Unit root (non-stationary)"
    )
  )
  
  
  return(result)
}

ntests <- function(model) {

  jb <- jarque.bera.test(model)
  shapiro <- shapiro.test(model)
  ad <- ad.test(model)
  cvm <- cvm.test(model)
  lillie <- lillie.test(model)

  sig_star <- function(p) {
    if (is.na(p)) return("")
    else if (p <= 0.001) return("***")
    else if (p <= 0.01)  return("**")
    else if (p <= 0.05)  return("*")
    else if (p <= 0.1)   return(".")
    else return("")
  }
  
  result <- data.frame(
    Test = c("Jarque-Bera", "Shapiro-Wilk", "Anderson-Darling", "Cramer-von Mises", "Lilliefors"),
    `Test Statistic` = round(c(jb$statistic, shapiro$statistic, ad$statistic, cvm$statistic, lillie$statistic), 4),
    `p-value` = signif(c(jb$p.value, shapiro$p.value, ad$p.value, cvm$p.value, lillie$p.value), 4),
    `Signif.` = sapply(c(jb$p.value, shapiro$p.value, ad$p.value, cvm$p.value, lillie$p.value), sig_star),
    `Null Hypothesis` = "Normality"
  )
  
  return(result)
}

Time plot and Correlogram

plot.ts(data, main = "Given Data")

n <- length(data)
acf2(data, lag.max = n / 4); title("Correlogram of Given Data")

# Model 1: Regression Based ### Regression to remove trend

x <- seq(from = 1, to = n, by = 1)
x2 <- x^2
x3 <- x^3
x4 <- x^4

out.lm1 <- lm(data ~ 1 + x)
summary(out.lm1)

## 
## Call:
## lm(formula = data ~ 1 + x)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.393  -4.889  -1.447   4.421  13.704 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 38.174257   0.930809   41.01   <2e-16 ***
## x            0.164442   0.009554   17.21   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.005 on 166 degrees of freedom
## Multiple R-squared:  0.6409, Adjusted R-squared:  0.6387 
## F-statistic: 296.3 on 1 and 166 DF,  p-value: < 2.2e-16

out.lm2 <- lm(data ~ 1 + x + x2)
summary(out.lm2)

## 
## Call:
## lm(formula = data ~ 1 + x + x2)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.354 -4.886 -1.320  4.532 13.378 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 36.2749377  1.3971038  25.964  < 2e-16 ***
## x            0.2314765  0.0381691   6.065 8.74e-09 ***
## x2          -0.0003967  0.0002188  -1.813   0.0716 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.964 on 165 degrees of freedom
## Multiple R-squared:  0.6479, Adjusted R-squared:  0.6436 
## F-statistic: 151.8 on 2 and 165 DF,  p-value: < 2.2e-16

out.lm3 <- lm(data ~ 1 + x + x2 + x3)
summary(out.lm3)

## 
## Call:
## lm(formula = data ~ 1 + x + x2 + x3)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.692 -4.847 -1.320  4.372 12.313 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.877e+01  1.866e+00  20.777   <2e-16 ***
## x            5.718e-02  9.533e-02   0.600   0.5495    
## x2           2.174e-03  1.309e-03   1.661   0.0986 .  
## x3          -1.014e-05  5.091e-06  -1.992   0.0480 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.912 on 164 degrees of freedom
## Multiple R-squared:  0.6562, Adjusted R-squared:  0.6499 
## F-statistic: 104.4 on 3 and 164 DF,  p-value: < 2.2e-16

out.lm4 <- lm(data ~ 1 + x + x2 + x3 + x4)
summary(out.lm4)

## 
## Call:
## lm(formula = data ~ 1 + x + x2 + x3 + x4)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.585 -4.608 -1.433  4.425 12.105 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.789e+01  2.369e+00  15.992   <2e-16 ***
## x            1.587e-01  1.930e-01   0.822    0.412    
## x2          -5.108e-04  4.625e-03  -0.110    0.912    
## x3           1.452e-05  4.106e-05   0.354    0.724    
## x4          -7.297e-08  1.205e-07  -0.605    0.546    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.923 on 163 degrees of freedom
## Multiple R-squared:  0.657,  Adjusted R-squared:  0.6486 
## F-statistic: 78.05 on 4 and 163 DF,  p-value: < 2.2e-16

plot.ts(data, main = "Regression with Order 1")
lines(time, out.lm1$fitted.values, col = "red")

plot.ts(data, main = "Regression with Order 2")
lines(time, out.lm2$fitted.values, col = "red")

plot.ts(data, main = "Regression with Order 3")
lines(time, out.lm3$fitted.values, col = "red")

plot.ts(data, main = "Regression with Order 4")
lines(time, out.lm4$fitted.values, col = "red")

par(mfrow=c(2,2))
plot(out.lm1)

plot(out.lm2)

plot(out.lm3)

plot(out.lm4)

par(mfrow=c(1,1))

lms <- list(out.lm1, out.lm2, out.lm3, out.lm4)

aic.lms <- numeric(4)
bic.lms <- numeric(4)
d <- 0
for (lm in lms) {
  d <- d + 1
  aic.lms[d] <- AIC(lm)
  bic.lms[d] <- BIC(lm) 
}

y_min <- min(aic.lms, bic.lms)
y_max <- max(aic.lms, bic.lms)

plot(1:4, aic.lms, type="b", col="blue", pch=19, 
     xlab="Regression Order", ylab="Criterion Value",
     main="AIC and BIC of Regression",
     ylim=c(y_min, y_max))

lines(1:4, bic.lms, type="b", col="red", pch=19)
legend("topright", legend=c("AIC", "BIC"), col=c("blue", "red"), pch=19, lty=1)

d.aic <- aic.lms - min(aic.lms)
d.bic <- bic.lms - min(bic.lms)

y_min <- min(d.aic, d.bic)
y_max <- max(d.aic, d.bic)

plot(1:4, d.aic, type="b", col="blue", pch=19, 
     xlab="Regression Order", ylab="Difference from Minimum",
     main="Difference of AIC and BIC",
     ylim=c(y_min, y_max))

lines(1:4, d.bic, type="b", col="red", pch=19)
abline(h = 2) #if the value does not exceed 2, the model show almost same performance.
legend("topright", legend=c("AIC - min(AIC)", "BIC - min(BIC)"), col=c("blue", "red"), pch=19, lty=1)

acf2(out.lm2$residuals, lag.max = n/4); title("Correlogram after Removing Trend by Reg 1")

### Harmonic Regression

t <- 1:n; f1 <- floor(n/12); f2 <- 2*f1; f3 <- 3*f1; f4 <- 4*f1

cos1 <- cos(f1*2*pi/n*t); sin1 <- sin(f1*2*pi/n*t)
cos2 <- cos(f2*2*pi/n*t); sin2 <- sin(f2*2*pi/n*t)
cos3 <- cos(f3*2*pi/n*t); sin3 <- sin(f3*2*pi/n*t)
cos4 <- cos(f4*2*pi/n*t); sin4 <- sin(f4*2*pi/n*t)

out.hlm1 <- lm(data ~ 1 + cos1 + sin1)
out.hlm2 <- lm(data ~ 1 + cos1 + sin1 + cos2 + sin2)
out.hlm3 <- lm(data ~ 1 + cos1 + sin1 + cos2 + sin2 + cos3 + sin3)
out.hlm4 <- lm(data ~ 1 + cos1 + sin1 + cos2 + sin2 + cos3 + sin3 + cos4 + sin4)

hlms <- list(out.hlm1, out.hlm2, out.hlm3, out.hlm4)

plot.ts(data - out.lm1$fitted.values, main = "Harmonic Regression Results", ylab = "Fitted Values")
lines(time, out.hlm1$fitted.values - mean(out.hlm1$fitted.values), col = "red")
lines(time, out.hlm2$fitted.values - mean(out.hlm2$fitted.values), col = "blue")

#lines(time, out.hlm3$fitted.values - mean(out.hlm3$fitted.values), col = "blue")
#lines(time, out.hlm2$fitted.values - mean(out.hlm2$fitted.values), col = "purple")

legend("topright", legend=c("k = 1", "k = 2"), col=c("red", "blue"), pch=19, lty=1)

#harmonic regression model selection by Information Criteria

aic.hlms <- numeric(4)
bic.hlms <- numeric(4)
k <- 0
for (hlm in hlms) {
  k <- k + 1
  aic.hlms[k] <- AIC(hlm)
  bic.hlms[k] <- BIC(hlm) 
}

y_min <- min(aic.hlms, bic.hlms)
y_max <- max(aic.hlms, bic.hlms)

plot(1:4, aic.hlms, type="b", col="blue", pch=19, 
     xlab="Harmonic terms (k)", ylab="Criterion Value",
     main="AIC and BIC of Harmonic Regression",
     ylim=c(y_min, y_max))

lines(1:4, bic.hlms, type="b", col="red", pch=19)
legend("topright", legend=c("AIC", "BIC"), col=c("blue", "red"), pch=19, lty=1)

d.aic <- aic.hlms - min(aic.hlms)
d.bic <- bic.hlms - min(bic.hlms)

y_min <- min(d.aic, d.bic)
y_max <- max(d.aic, d.bic)

plot(1:4, d.aic, type="b", col="blue", pch=19, 
     xlab="Harmonic terms (k)", ylab="Difference from Minimum",
     main="Difference of AIC and BIC",
     ylim=c(y_min, y_max))

lines(1:4, d.bic, type="b", col="red", pch=19)
abline(h = 2) #if the value does not exceed 2, the model show almost same performance.
legend("topright", legend=c("AIC - min(AIC)", "BIC - min(BIC)"), col=c("blue", "red"), pch=19, lty=1)

model1 <- data - out.lm1$fitted.values - (out.hlm1$fitted.values - mean(out.hlm1$fitted.values))
plot.ts(model1, main = "Regression Based Model")

sstest(model1)

## Warning in pp.test(model): p-value smaller than printed p-value

##                        Test Test.Statistic p.value Signif.
## Dickey-Fuller           ADF        -2.3610 0.42560        
## KPSS Level             KPSS         0.3531 0.09739       .
## Dickey-Fuller Z(alpha)   PP      -127.7105 0.01000      **
##                                   Null.Hypothesis
## Dickey-Fuller          Unit root (non-stationary)
## KPSS Level                           Stationarity
## Dickey-Fuller Z(alpha) Unit root (non-stationary)

acf2(model1, lag.max = n/4); title("Correlogram of Reg Based")

test(model1)

## Null hypothesis: Residuals are iid noise.
## Test                        Distribution Statistic   p-value
## Ljung-Box Q                Q ~ chisq(20)    236.38         0 *
## McLeod-Li Q                Q ~ chisq(20)     51.81     1e-04 *
## Turning points T  (T-110.7)/5.4 ~ N(0,1)       117    0.2439
## Diff signs S       (S-83.5)/3.8 ~ N(0,1)        70     3e-04 *
## Rank P           (P-7014)/364.5 ~ N(0,1)      7338    0.3741

#Normality Tests
ntests(model1)

##               Test Test.Statistic p.value Signif. Null.Hypothesis
## 1      Jarque-Bera         1.1123  0.5734               Normality
## 2     Shapiro-Wilk         0.9946  0.8019               Normality
## 3 Anderson-Darling         0.2011  0.8798               Normality
## 4 Cramer-von Mises         0.0292  0.8567               Normality
## 5       Lilliefors         0.0315  0.9511               Normality

Model 2: Smoothing Based

cd <- classical(data, d = 12, order = 1)

out = classical(data, d=12, order=1);
par(mfrow=c(2,2))
plot.ts(data);
title("step1")
lines(time, cd$m1, col="red");
plot.ts(data-cd$m1);
title("step2")
lines(time, cd$st, col="red");
plot.ts(data-cd$st);
title("step3")
lines(time, cd$m, col="red");
plot.ts(data);
lines(time, cd$fit, col="red")
title("Final")

model2 <- cd$resi
par(mfrow = c(1,1))
plot.ts(model2, main = "Smoothing Based")

acf2(model2, lag.max = n/4); title("Correlogram of Model 2")

sstest(model2)

## Warning in kpss.test(model, null = "Level"): p-value greater than printed
## p-value

##                        Test Test.Statistic p.value Signif.
## Dickey-Fuller           ADF        -2.5195 0.35940        
## KPSS Level             KPSS         0.3233 0.10000       .
## Dickey-Fuller Z(alpha)   PP       -24.1594 0.02422       *
##                                   Null.Hypothesis
## Dickey-Fuller          Unit root (non-stationary)
## KPSS Level                           Stationarity
## Dickey-Fuller Z(alpha) Unit root (non-stationary)

test(model2)

## Null hypothesis: Residuals are iid noise.
## Test                        Distribution Statistic   p-value
## Ljung-Box Q                Q ~ chisq(20)    643.14         0 *
## McLeod-Li Q                Q ~ chisq(20)    365.57         0 *
## Turning points T  (T-110.7)/5.4 ~ N(0,1)        98    0.0198 *
## Diff signs S       (S-83.5)/3.8 ~ N(0,1)        79    0.2305
## Rank P           (P-7014)/364.5 ~ N(0,1)      7058    0.9039

# Normality Test
ntests(model2)

##               Test Test.Statistic   p.value Signif. Null.Hypothesis
## 1      Jarque-Bera         2.6773 0.2622000               Normality
## 2     Shapiro-Wilk         0.9790 0.0119800       *       Normality
## 3 Anderson-Darling         1.3343 0.0017910      **       Normality
## 4 Cramer-von Mises         0.2156 0.0033680      **       Normality
## 5       Lilliefors         0.0992 0.0003618     ***       Normality

Model3: Differencing Based

sd <- diff(data, 12)
plot.ts(sd, main = "Seasonal Differenced")

acf2(sd, lag.max = n/4); title("Correlogram after Seasonal Diff")

model3 <- diff(sd)
plot.ts(model3, main = "Diff Based")

sstest(model3)

## Warning in adf.test(model, alternative = "stationary"): p-value smaller than
## printed p-value

## Warning in kpss.test(model, null = "Level"): p-value greater than printed
## p-value

## Warning in pp.test(model): p-value smaller than printed p-value

##                        Test Test.Statistic p.value Signif.
## Dickey-Fuller           ADF        -5.3156    0.01      **
## KPSS Level             KPSS         0.0369    0.10       .
## Dickey-Fuller Z(alpha)   PP      -180.6863    0.01      **
##                                   Null.Hypothesis
## Dickey-Fuller          Unit root (non-stationary)
## KPSS Level                           Stationarity
## Dickey-Fuller Z(alpha) Unit root (non-stationary)

test(model3)

## Null hypothesis: Residuals are iid noise.
## Test                        Distribution Statistic   p-value
## Ljung-Box Q                Q ~ chisq(20)     77.33         0 *
## McLeod-Li Q                Q ~ chisq(20)     23.24    0.2772
## Turning points T    (T-102)/5.2 ~ N(0,1)       105    0.5654
## Diff signs S         (S-77)/3.6 ~ N(0,1)        73    0.2673
## Rank P         (P-5967.5)/323.2 ~ N(0,1)      5897    0.8273

#Normality tests

ntests(model3)

##               Test Test.Statistic p.value Signif. Null.Hypothesis
## 1      Jarque-Bera         5.4396 0.06589       .       Normality
## 2     Shapiro-Wilk         0.9838 0.06587       .       Normality
## 3 Anderson-Darling         0.5067 0.19810               Normality
## 4 Cramer-von Mises         0.0594 0.38270               Normality
## 5       Lilliefors         0.0404 0.77670               Normality

Transform to Rmd

library(knitr)
knitr::spin("2022 MIDTERM.R", knit = FALSE)

## [1] "2022 MIDTERM.Rmd"

2022 MIDTERM

Lee Younghan

Import libraries and data

Function for stationary & normality test

Time plot and Correlogram

Model 2: Smoothing Based

Model3: Differencing Based