rm(list = ls(all = TRUE))
setwd("/Users/iyeonghan/skku/25-1/Time Series/Exercises")
## Import packages
library(itsmr)
library(TSlibrary)
##
## Attaching package: 'TSlibrary'
## The following objects are masked from 'package:itsmr':
##
## airpass, season, smooth.exp, smooth.ma, trend
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Attaching package: 'tseries'
## The following object is masked from 'package:itsmr':
##
## arma
library(urca)
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
data <- read.table("~/skku/25-1/Time Series/Exercises/2024practice1.csv", quote="\"", comment.char="")
data <- ts(data, start = c(2001, 1), end = c(2015, 12), frequency = 12)
time = as.vector(time(data))
## (a)
plot.ts(data, ylab = "Data"); title("Data")

acf2(data, lag.max = 45); title("Correlogram of Data")

## Seasonality: half of year, strong autocorrelation, ...
## (b)
### Model 1: Regression
n <- length(data)
x <- seq(from = 1, to = n, by = 1)
x.sq <- x^2
x.cu <- x^3
x.q <- x^4
out.lm1 <- lm(data ~ 1 + x)
summary(out.lm1)
##
## Call:
## lm(formula = data ~ 1 + x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -43.898 -23.640 -1.591 20.111 82.286
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 55.28755 4.07194 13.58 <2e-16 ***
## x 0.93418 0.03902 23.94 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 27.2 on 178 degrees of freedom
## Multiple R-squared: 0.763, Adjusted R-squared: 0.7617
## F-statistic: 573.2 on 1 and 178 DF, p-value: < 2.2e-16
lin.1 <- out.lm1$fitted.values
plot.ts(data); title("Estimated Trend by Reg(1)")
lines(ts(lin.1, start = c(2001, 1), end = c(2015, 12), frequency = 12), col = "red")

plot.ts(out.lm1$residuals, main = "Residuals of Reg(1)")

acf2(out.lm1$residuals); title("ACF from Residual of Reg(1)")

out.lm2 <- lm(data ~ 1 + x + x.sq)
summary(out.lm2)
##
## Call:
## lm(formula = data ~ 1 + x + x.sq)
##
## Residuals:
## Min 1Q Median 3Q Max
## -55.354 -12.255 -1.668 10.980 68.278
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 99.940584 4.241498 23.563 < 2e-16 ***
## x -0.537902 0.108198 -4.971 1.56e-06 ***
## x.sq 0.008133 0.000579 14.047 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.76 on 177 degrees of freedom
## Multiple R-squared: 0.8879, Adjusted R-squared: 0.8867
## F-statistic: 701.3 on 2 and 177 DF, p-value: < 2.2e-16
lin.2 <- out.lm2$fitted.values
plot.ts(data); title("Estimated Trend by Reg(2)")
lines(ts(lin.2, start = c(2001, 1), end = c(2015, 12), frequency = 12), col = "red")

plot.ts(out.lm2$residuals, main = "Residuals of Reg(2)")

acf2(out.lm2$residuals); title("ACF from Residual of Reg(2)")

out.lm3 <- lm(data ~ 1 + x + x.sq + x.cu)
summary(out.lm3)
##
## Call:
## lm(formula = data ~ 1 + x + x.sq + x.cu)
##
## Residuals:
## Min 1Q Median 3Q Max
## -54.060 -12.337 -1.294 11.167 68.135
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.014e+02 5.725e+00 17.716 < 2e-16 ***
## x -6.349e-01 2.732e-01 -2.324 0.02126 *
## x.sq 9.469e-03 3.502e-03 2.704 0.00752 **
## x.cu -4.920e-06 1.272e-05 -0.387 0.69936
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.8 on 176 degrees of freedom
## Multiple R-squared: 0.888, Adjusted R-squared: 0.8861
## F-statistic: 465.3 on 3 and 176 DF, p-value: < 2.2e-16
lin.3 <- out.lm3$fitted.values
plot.ts(data); title("Estimated Trend by Reg(3)")
lines(ts(lin.3, start = c(2001, 1), end = c(2015, 12), frequency = 12), col = "red")

plot.ts(out.lm3$residuals, main = "Residuals of Reg(3)")

acf2(out.lm3$residuals); title("ACF from Residual of Reg(3)")

out.lm4 <- lm(data ~ 1 + x + x.sq + x.cu + x.q)
summary(out.lm4)
##
## Call:
## lm(formula = data ~ 1 + x + x.sq + x.cu + x.q)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33.315 -10.047 -1.351 8.706 57.457
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.077e+01 6.209e+00 11.398 < 2e-16 ***
## x 2.670e+00 4.726e-01 5.649 6.42e-08 ***
## x.sq -7.216e-02 1.058e-02 -6.823 1.40e-10 ***
## x.cu 6.953e-04 8.767e-05 7.930 2.48e-13 ***
## x.q -1.934e-06 2.403e-07 -8.049 1.23e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.11 on 175 degrees of freedom
## Multiple R-squared: 0.9183, Adjusted R-squared: 0.9164
## F-statistic: 491.7 on 4 and 175 DF, p-value: < 2.2e-16
lin.4 <- out.lm4$fitted.values
plot.ts(data); title("Estimated Trend by Reg(4)")
lines(ts(lin.4, start = c(2001, 1), end = c(2015, 12), frequency = 12), col = "red")

plot.ts(out.lm4$residuals, main = "Residuals of Reg(4)")

acf2(out.lm4$residuals); title("ACF from Residual of Reg(4)")

### Reg(4) is the best to captures the trend in terms of the highest R^2
### Harmonic Regression to Estimate Seasonality
t <- 1:n; f1 <- floor(n/6); f2 <- 2*f1; f3 <- 3*f1; f4 <- 4*f1
cos1 <- cos(f1*2*pi/n*t); sin1 <- sin(f1*2*pi/n*t)
cos2 <- cos(f2*2*pi/n*t); sin2 <- sin(f2*2*pi/n*t)
cos3 <- cos(f3*2*pi/n*t); sin3 <- sin(f3*2*pi/n*t)
cos4 <- cos(f4*2*pi/n*t); sin4 <- sin(f4*2*pi/n*t)
out.hlm1 <- lm(data ~ 1 + cos1 + sin1)
out.hlm2 <- lm(data ~ 1 + cos1 + sin1 + cos2 + sin2)
out.hlm3 <- lm(data ~ 1 + cos1 + sin1 + cos2 + sin2 + cos3 + sin3)
out.hlm4 <- lm(data ~ 1 + cos1 + sin1 + cos2 + sin2 + cos3 + sin3 + cos4 + sin4)
hlms <- list(out.hlm1, out.hlm2, out.hlm3, out.hlm4)
aic.hlms <- numeric(4)
bic.hlms <- numeric(4)
k <- 0
for (hlm in hlms) {
k <- k + 1
aic.hlms[k] <- AIC(hlm)
bic.hlms[k] <- BIC(hlm)
}
y_min <- min(aic.hlms, bic.hlms)
y_max <- max(aic.hlms, bic.hlms)
plot(1:4, aic.hlms, type="b", col="blue", pch=19,
xlab="Harmonic terms (k)", ylab="Criterion Value",
main="AIC and BIC of Harmonic Regression",
ylim=c(y_min, y_max))
lines(1:4, bic.hlms, type="b", col="red", pch=19)
legend("topright", legend=c("AIC", "BIC"), col=c("blue", "red"), pch=19, lty=1)

plot.ts(data, main = "Harmonic Reg", ylab = "data")
lines(ts(out.hlm1$fitted.values, start = c(2001, 1), end = c(2015, 12), frequency = 12), col = "red")

sadj.reg <- data - out.hlm1$fitted.values
plot.ts(sadj.reg)

model1 <- sadj.reg - out.lm4$fitted.values
plot.ts(model1); title("Model 1")

out.adf1 <- ur.df(model1, type = "drift", selectlags = "AIC")
summary(out.adf1)
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression drift
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -34.224 -7.876 0.015 8.882 41.512
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -71.02052 11.67371 -6.084 7.20e-09 ***
## z.lag.1 -0.50780 0.08331 -6.096 6.77e-09 ***
## z.diff.lag -0.17782 0.07486 -2.375 0.0186 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.71 on 175 degrees of freedom
## Multiple R-squared: 0.3313, Adjusted R-squared: 0.3237
## F-statistic: 43.35 on 2 and 175 DF, p-value: 5.105e-16
##
##
## Value of test-statistic is: -6.0955 18.5803
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau2 -3.46 -2.88 -2.57
## phi1 6.52 4.63 3.81
acf2(model1, lag.max = n/4); title("Correlogram of Model 1")

test(model1)
## Null hypothesis: Residuals are iid noise.
## Test Distribution Statistic p-value
## Ljung-Box Q Q ~ chisq(20) 223.95 0 *
## McLeod-Li Q Q ~ chisq(20) 232.54 0 *
## Turning points T (T-118.7)/5.6 ~ N(0,1) 121 0.6785
## Diff signs S (S-89.5)/3.9 ~ N(0,1) 94 0.2466
## Rank P (P-8055)/404.2 ~ N(0,1) 7913 0.7253

jarque.bera.test(model1)
##
## Jarque Bera Test
##
## data: model1
## X-squared = 5.9423, df = 2, p-value = 0.05124
## Model 2: Smoothing Based
cda.1 <- classical(data, d = 6, order = 2)
model2.1 <- cda.1$resi
plot.ts(model2.1, ylab = "Residuals", main = "Model 2")

out.adf2.1 <- ur.df(model2.1, type = "drift", selectlags = "AIC")
summary(out.adf2.1)
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression drift
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32.104 -5.501 0.217 5.738 27.644
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.05927 0.70880 0.084 0.933452
## z.lag.1 -0.18477 0.05215 -3.543 0.000507 ***
## z.diff.lag -0.26484 0.07428 -3.565 0.000469 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.452 on 175 degrees of freedom
## Multiple R-squared: 0.1888, Adjusted R-squared: 0.1795
## F-statistic: 20.36 on 2 and 175 DF, p-value: 1.12e-08
##
##
## Value of test-statistic is: -3.5433 6.2777
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau2 -3.46 -2.88 -2.57
## phi1 6.52 4.63 3.81
acf2(model2.1); title("Correlogram of Model 2")

test(model2.1)
## Null hypothesis: Residuals are iid noise.
## Test Distribution Statistic p-value
## Ljung-Box Q Q ~ chisq(20) 428.64 0 *
## McLeod-Li Q Q ~ chisq(20) 88.14 0 *
## Turning points T (T-118.7)/5.6 ~ N(0,1) 119 0.9528
## Diff signs S (S-89.5)/3.9 ~ N(0,1) 91 0.6993
## Rank P (P-8055)/404.2 ~ N(0,1) 8282 0.5743

jarque.bera.test(model2.1)
##
## Jarque Bera Test
##
## data: model2.1
## X-squared = 3.1638, df = 2, p-value = 0.2056
cda.2 <- classical(data, d = 12, order = 2)
model2.2 <- cda.2$resi
plot.ts(model2.2, ylab = "Residuals", main = "Model 2")

out.adf2.2 <- ur.df(model2.2, type = "drift", selectlags = "AIC")
summary(out.adf2.2)
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression drift
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.5222 -4.1800 0.1218 4.4284 20.6826
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.02540 0.58484 0.043 0.965407
## z.lag.1 -0.14608 0.04357 -3.353 0.000981 ***
## z.diff.lag -0.08927 0.07660 -1.165 0.245450
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.799 on 175 degrees of freedom
## Multiple R-squared: 0.0866, Adjusted R-squared: 0.07616
## F-statistic: 8.296 on 2 and 175 DF, p-value: 0.0003613
##
##
## Value of test-statistic is: -3.3527 5.6218
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau2 -3.46 -2.88 -2.57
## phi1 6.52 4.63 3.81
acf2(model2.2); title("Correlogram of Model 2")

test(model2.2)
## Null hypothesis: Residuals are iid noise.
## Test Distribution Statistic p-value
## Ljung-Box Q Q ~ chisq(20) 497.45 0 *
## McLeod-Li Q Q ~ chisq(20) 125.5 0 *
## Turning points T (T-118.7)/5.6 ~ N(0,1) 114 0.407
## Diff signs S (S-89.5)/3.9 ~ N(0,1) 90 0.8976
## Rank P (P-8055)/404.2 ~ N(0,1) 8353 0.4609

jarque.bera.test(model2.2)
##
## Jarque Bera Test
##
## data: model2.2
## X-squared = 6.3671, df = 2, p-value = 0.04144
## Model 3: Differencing Based
model3.1 <- diff(data)
model3.2 <- diff(diff(data, 6))
model3.3 <- diff(diff(data, 12))
plot.ts(model3.1, "main" = "Model3.1: Diff(1)", ylab = "diff(1)")

plot.ts(model3.2, ylab = "diff(1)diff(6)"); title("Model 3.2")

plot.ts(model3.3, ylab = "diff(1)diff(12)"); title("Model 3.3")

out.adf3.1 <- ur.df(model3.1, type = "drift", selectlags = "AIC")
summary(out.adf3.1)
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression drift
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40.319 -10.654 0.028 9.034 46.369
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.30544 1.21740 1.072 0.285
## z.lag.1 -1.51860 0.10864 -13.979 < 2e-16 ***
## z.diff.lag 0.32687 0.07185 4.549 1.01e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.16 on 174 degrees of freedom
## Multiple R-squared: 0.6158, Adjusted R-squared: 0.6114
## F-statistic: 139.4 on 2 and 174 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -13.9786 97.713
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau2 -3.46 -2.88 -2.57
## phi1 6.52 4.63 3.81
out.adf3.2 <- ur.df(model3.2, type = "drift", selectlags = "AIC")
summary(out.adf3.2)
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression drift
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40.623 -8.866 -0.217 9.635 42.951
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.37205 1.13369 -0.328 0.74318
## z.lag.1 -1.78477 0.12951 -13.781 < 2e-16 ***
## z.diff.lag 0.20903 0.07534 2.774 0.00616 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.82 on 168 degrees of freedom
## Multiple R-squared: 0.7506, Adjusted R-squared: 0.7477
## F-statistic: 252.9 on 2 and 168 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -13.7807 94.9561
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau2 -3.46 -2.88 -2.57
## phi1 6.52 4.63 3.81
out.adf3.3 <- ur.df(model3.3, type = "drift", selectlags = "AIC")
summary(out.adf3.3)
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression drift
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.675 -4.622 0.712 5.975 31.517
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.27643 0.79032 -0.350 0.727
## z.lag.1 -1.48300 0.12831 -11.558 <2e-16 ***
## z.diff.lag 0.09949 0.07794 1.276 0.204
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.15 on 162 degrees of freedom
## Multiple R-squared: 0.6778, Adjusted R-squared: 0.6739
## F-statistic: 170.4 on 2 and 162 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -11.5576 66.7887
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau2 -3.46 -2.88 -2.57
## phi1 6.52 4.63 3.81
acf2(model3.1, lag.max = n/4)

acf2(model3.2, lag.max = n/4)

acf2(model3.3, lag.max = n/4)

test(model3.1)
## Null hypothesis: Residuals are iid noise.
## Test Distribution Statistic p-value
## Ljung-Box Q Q ~ chisq(20) 328.29 0 *
## McLeod-Li Q Q ~ chisq(20) 104.69 0 *
## Turning points T (T-118)/5.6 ~ N(0,1) 120 0.7216
## Diff signs S (S-89)/3.9 ~ N(0,1) 95 0.1213
## Rank P (P-7965.5)/400.8 ~ N(0,1) 8122 0.6962

test(model3.2)
## Null hypothesis: Residuals are iid noise.
## Test Distribution Statistic p-value
## Ljung-Box Q Q ~ chisq(20) 373.49 0 *
## McLeod-Li Q Q ~ chisq(20) 85 0 *
## Turning points T (T-114)/5.5 ~ N(0,1) 135 1e-04 *
## Diff signs S (S-86)/3.8 ~ N(0,1) 86 1
## Rank P (P-7439)/380.9 ~ N(0,1) 7462 0.9518

test(model3.3)
## Null hypothesis: Residuals are iid noise.
## Test Distribution Statistic p-value
## Ljung-Box Q Q ~ chisq(20) 90.35 0 *
## McLeod-Li Q Q ~ chisq(20) 32.54 0.0379 *
## Turning points T (T-110)/5.4 ~ N(0,1) 119 0.0968
## Diff signs S (S-83)/3.7 ~ N(0,1) 78 0.1814
## Rank P (P-6930.5)/361.3 ~ N(0,1) 6955 0.9459

jarque.bera.test(model3.1)
##
## Jarque Bera Test
##
## data: model3.1
## X-squared = 2.4735, df = 2, p-value = 0.2903
jarque.bera.test(model3.2)
##
## Jarque Bera Test
##
## data: model3.2
## X-squared = 4.4672, df = 2, p-value = 0.1071
jarque.bera.test(model3.3)
##
## Jarque Bera Test
##
## data: model3.3
## X-squared = 36.756, df = 2, p-value = 1.043e-08
## Model 4:
hopt = opt.ma.cv(interval=c(5, floor(n/2)), tol=1e-20, data=diff(data,12), l=1)
hopt$h.ma;
## [1] 5.607365
plot.ts(diff(data,12))
lines(as.vector(time(diff(data,12))), hopt$out.ma, col = "red");
lines(as.vector(time(diff(data,12))),smooth.exp(diff(data,12), 0.4), col = "blue")

model4 <- diff(data,12) - hopt$out.ma
plot.ts(model4)

out.adf4 <- ur.df(model4, type = "drift", selectlags = "AIC")
summary(out.adf4)
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression drift
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.0668 -4.3073 0.0858 4.6536 21.9368
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.02906 0.62188 -0.047 0.963
## z.lag.1 -0.91378 0.10228 -8.934 8.4e-16 ***
## z.diff.lag 0.06521 0.07812 0.835 0.405
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.012 on 163 degrees of freedom
## Multiple R-squared: 0.4332, Adjusted R-squared: 0.4263
## F-statistic: 62.3 on 2 and 163 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -8.9342 39.9208
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau2 -3.46 -2.88 -2.57
## phi1 6.52 4.63 3.81
acf2(model4, lag.max = length(model4) / 4); title("Correlogram of Model 4")

test(model4)
## Null hypothesis: Residuals are iid noise.
## Test Distribution Statistic p-value
## Ljung-Box Q Q ~ chisq(20) 76.31 0 *
## McLeod-Li Q Q ~ chisq(20) 31.46 0.0495 *
## Turning points T (T-110.7)/5.4 ~ N(0,1) 104 0.22
## Diff signs S (S-83.5)/3.8 ~ N(0,1) 91 0.0457 *
## Rank P (P-7014)/364.5 ~ N(0,1) 6840 0.6331

jarque.bera.test(model4)
##
## Jarque Bera Test
##
## data: model4
## X-squared = 0.71376, df = 2, p-value = 0.6999