rm(list = ls(all = TRUE))
setwd("/Users/iyeonghan/skku/25-1/Time Series/Exercises")

library(itsmr)
library(TSlibrary)
## 
## Attaching package: 'TSlibrary'
## The following objects are masked from 'package:itsmr':
## 
##     airpass, season, smooth.exp, smooth.ma, trend
library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## 
## Attaching package: 'tseries'
## The following object is masked from 'package:itsmr':
## 
##     arma
library(urca)
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
#Import data
data <- read.table("~/skku/25-1/Time Series/Exercises/2022practice1.csv", quote="\"", comment.char="")
data <- ts(data, start = c(2005, 1), end = c(2018, 12), frequency = 12)
time = as.vector(time(data))

#Time plot and correlogram
plot.ts(data, main = "Data", ylab = "Values")

#d = 12
n <- length(data)
acf2(data, lag.max = n/4)

# Model 1, to stabilize mean and variance, take log and diff(1)
log.data <- log(data)
plot.ts(log.data)

model1 <- diff(log.data)
plot.ts(model1)

out.adf1 <- ur.df(model1, type = "drift", selectlags = "AIC")
summary(out.adf1)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression drift 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.3506  0.0102  0.0174  0.0404  0.9841 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.0003589  0.0378374   0.009    0.992    
## z.lag.1     -1.2619790  0.1076246 -11.726   <2e-16 ***
## z.diff.lag   0.0579507  0.0638605   0.907    0.366    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4856 on 162 degrees of freedom
## Multiple R-squared:  0.599,  Adjusted R-squared:  0.594 
## F-statistic:   121 on 2 and 162 DF,  p-value: < 2.2e-16
## 
## 
## Value of test-statistic is: -11.7257 68.8062 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau2 -3.46 -2.88 -2.57
## phi1  6.52  4.63  3.81
acf2(model1, lag.max = n / 4); title("Correlogram of Model 1")

test(model1)
## Null hypothesis: Residuals are iid noise.
## Test                        Distribution Statistic   p-value
## Ljung-Box Q                Q ~ chisq(20)     25.09    0.1981
## McLeod-Li Q                Q ~ chisq(20)     24.03    0.2411
## Turning points T    (T-110)/5.4 ~ N(0,1)        32         0 *
## Diff signs S         (S-83)/3.7 ~ N(0,1)        16         0 *
## Rank P         (P-6930.5)/361.3 ~ N(0,1)      6167    0.0346 *

jarque.bera.test(model1)
## 
##  Jarque Bera Test
## 
## data:  model1
## X-squared = 14620, df = 2, p-value < 2.2e-16
# Model 2, to consider seasonality, use seasonal diff to log.data
model2 <- diff(log.data, 12)
plot.ts(model2, main = "Model 2")

out.adf2 <- ur.df(model2, type = "drift", selectlags = "AIC")
summary(out.adf2)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression drift 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0249  0.0068  0.0134  0.0331  4.2591 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.007193   0.047034   0.153 0.878656    
## z.lag.1     -0.174608   0.048824  -3.576 0.000469 ***
## z.diff.lag  -0.045371   0.070923  -0.640 0.523328    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5831 on 151 degrees of freedom
## Multiple R-squared:  0.09474,    Adjusted R-squared:  0.08275 
## F-statistic: 7.902 on 2 and 151 DF,  p-value: 0.0005449
## 
## 
## Value of test-statistic is: -3.5763 6.3961 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau2 -3.46 -2.88 -2.57
## phi1  6.52  4.63  3.81
acf2(model2, lag.max = n / 4); title("Correlogram of Model 2")

test(model2)
## Null hypothesis: Residuals are iid noise.
## Test                        Distribution Statistic   p-value
## Ljung-Box Q                Q ~ chisq(20)    287.85         0 *
## McLeod-Li Q                Q ~ chisq(20)    280.92         0 *
## Turning points T  (T-102.7)/5.2 ~ N(0,1)        28         0 *
## Diff signs S       (S-77.5)/3.6 ~ N(0,1)        27         0 *
## Rank P           (P-6045)/326.3 ~ N(0,1)      6064    0.9536

jarque.bera.test(model2)
## 
##  Jarque Bera Test
## 
## data:  model2
## X-squared = 281.38, df = 2, p-value < 2.2e-16
# Model 3, diff(1)
model3 <- diff(data)
plot.ts(model3, main = "Model 3")

out.adf3 <- ur.df(model3, type = "drift", selectlags = "AIC")
summary(out.adf3)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression drift 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -164.854    1.146    1.146    1.186   13.414 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.08156    1.41096  -0.058    0.954    
## z.lag.1     -1.06411    0.10909  -9.755   <2e-16 ***
## z.diff.lag   0.02358    0.07412   0.318    0.751    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18.12 on 162 degrees of freedom
## Multiple R-squared:  0.5209, Adjusted R-squared:  0.5149 
## F-statistic: 88.05 on 2 and 162 DF,  p-value: < 2.2e-16
## 
## 
## Value of test-statistic is: -9.7548 47.5996 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau2 -3.46 -2.88 -2.57
## phi1  6.52  4.63  3.81
acf2(model3, lag.max = n/4); title("Correlogram of Model 3")

test(model3)
## Null hypothesis: Residuals are iid noise.
## Test                        Distribution Statistic   p-value
## Ljung-Box Q                Q ~ chisq(20)      8.13     0.991
## McLeod-Li Q                Q ~ chisq(20)      2.06         1
## Turning points T    (T-110)/5.4 ~ N(0,1)        16         0 *
## Diff signs S         (S-83)/3.7 ~ N(0,1)        16         0 *
## Rank P         (P-6930.5)/361.3 ~ N(0,1)      1126         0 *

jarque.bera.test(model3)
## 
##  Jarque Bera Test
## 
## data:  model3
## X-squared = 27977, df = 2, p-value < 2.2e-16
# Model 4, ma filter
hopt = opt.ma.cv(interval=c(5, floor(n/2)), tol=.1e-20, data=data, l=1)
hopt$h.ma;
## [1] 6.039294
plot.ts(data, main = "MA(optimal q)", ylab = "values")
lines(time, hopt$out.ma, col = "red")

model4 <- data - hopt$out.ma
plot.ts(model4, main = "Model 4")

out.adf4 <- ur.df(model4, type = "drift", selectlags = "AIC")
summary(out.adf4)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression drift 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -120.290   -0.455   -0.322   -0.173   41.455 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.28604    1.21470   0.235   0.8141    
## z.lag.1     -0.46604    0.06961  -6.695 3.31e-10 ***
## z.diff.lag   0.13787    0.07400   1.863   0.0642 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.63 on 163 degrees of freedom
## Multiple R-squared:  0.2218, Adjusted R-squared:  0.2123 
## F-statistic: 23.23 on 2 and 163 DF,  p-value: 1.33e-09
## 
## 
## Value of test-statistic is: -6.695 22.4136 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau2 -3.46 -2.88 -2.57
## phi1  6.52  4.63  3.81
acf2(model4, lag.max = n / 4)

test(model4)
## Null hypothesis: Residuals are iid noise.
## Test                        Distribution Statistic   p-value
## Ljung-Box Q                Q ~ chisq(20)    133.81         0 *
## McLeod-Li Q                Q ~ chisq(20)    343.65         0 *
## Turning points T  (T-110.7)/5.4 ~ N(0,1)        35         0 *
## Diff signs S       (S-83.5)/3.8 ~ N(0,1)        44         0 *
## Rank P           (P-7014)/364.5 ~ N(0,1)      6605    0.2619

t <- 1:n; f1 <- floor(n/12); f2 <- 2*f1; f3 <- 3*f1; f4 <- 4*f1

cos1 <- cos(f1*2*pi/n*t); sin1 <- sin(f1*2*pi/n*t)
cos2 <- cos(f2*2*pi/n*t); sin2 <- sin(f2*2*pi/n*t)
cos3 <- cos(f3*2*pi/n*t); sin3 <- sin(f3*2*pi/n*t)
cos4 <- cos(f4*2*pi/n*t); sin4 <- sin(f4*2*pi/n*t)

out.hlm1 <- lm(data ~ 1 + cos1 + sin1)
out.hlm2 <- lm(data ~ 1 + cos1 + sin1 + cos2 + sin2)
out.hlm3 <- lm(data ~ 1 + cos1 + sin1 + cos2 + sin2 + cos3 + sin3)
out.hlm4 <- lm(data ~ 1 + cos1 + sin1 + cos2 + sin2 + cos3 + sin3 + cos4 + sin4)

hlms <- list(out.hlm1, out.hlm2, out.hlm3, out.hlm4)

aic.hlms <- numeric(4)
bic.hlms <- numeric(4)
k <- 0
for (hlm in hlms) {
  k <- k + 1
  aic.hlms[k] <- AIC(hlm)
  bic.hlms[k] <- BIC(hlm) 
}

y_min <- min(aic.hlms, bic.hlms)
y_max <- max(aic.hlms, bic.hlms)

plot(1:4, aic.hlms, type="b", col="blue", pch=19, 
     xlab="Harmonic terms (k)", ylab="Criterion Value",
     main="AIC and BIC of Harmonic Regression",
     ylim=c(y_min, y_max))

lines(1:4, bic.hlms, type="b", col="red", pch=19)
legend("topright", legend=c("AIC", "BIC"), col=c("blue", "red"), pch=19, lty=1)

plot.ts(model4, main = "Harmonic Reg", ylab = "data")
lines(time, out.hlm1$fitted.values - mean(out.hlm1$fitted.values), col = "red")

model5 <- model4 - (out.hlm1$fitted.values - mean(out.hlm1$fitted.values))
plot.ts(model5, main = "model5")

out.adf5 <- ur.df(model5, type = "drift", selectlags = "AIC")
summary(out.adf5)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression drift 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -119.975   -0.915   -0.194    0.382   41.482 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.27932    1.21498   0.230    0.818    
## z.lag.1     -0.46631    0.06960  -6.700 3.22e-10 ***
## z.diff.lag   0.13860    0.07405   1.872    0.063 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.64 on 163 degrees of freedom
## Multiple R-squared:  0.2221, Adjusted R-squared:  0.2125 
## F-statistic: 23.27 on 2 and 163 DF,  p-value: 1.293e-09
## 
## 
## Value of test-statistic is: -6.7003 22.4491 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau2 -3.46 -2.88 -2.57
## phi1  6.52  4.63  3.81
acf2(model5, lag.max = n/4)

test(model5)
## Null hypothesis: Residuals are iid noise.
## Test                        Distribution Statistic   p-value
## Ljung-Box Q                Q ~ chisq(20)    135.29         0 *
## McLeod-Li Q                Q ~ chisq(20)     344.7         0 *
## Turning points T  (T-110.7)/5.4 ~ N(0,1)        40         0 *
## Diff signs S       (S-83.5)/3.8 ~ N(0,1)        96     9e-04 *
## Rank P           (P-7014)/364.5 ~ N(0,1)      6492    0.1521

library(knitr)
knitr::spin("22mid.R", knit = FALSE)
## [1] "22mid.Rmd"