```` # Set initial environment
setwd("/Users/iyeonghan/skku/25-1/Time Series/Exercises")
rm(list = ls(all = TRUE))
df <- read.table(file = "2022practice1.csv", sep = ",")
data <- ts(df$V2, start = c(2005,1), end = c(2018, 12), frequency = 12)
time = as.vector(time(data))
library(TSlibrary)
library(itsmr)
##
## Attaching package: 'itsmr'
## The following objects are masked from 'package:TSlibrary':
##
## airpass, season, smooth.exp, smooth.ma, trend
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Attaching package: 'tseries'
## The following object is masked from 'package:itsmr':
##
## arma
library(urca)
library(nortest)
sstest <- function(model) {
# ADF Test
adf <- adf.test(model, alternative = "stationary")
adf_stat <- adf$statistic
adf_pval <- adf$p.value
# KPSS Test
kpss <- kpss.test(model, null = "Level")
kpss_stat <- kpss$statistic
kpss_pval <- kpss$p.value
# PP Test
pp <- pp.test(model)
pp_stat <- pp$statistic
pp_pval <- pp$p.value
sig_star <- function(p) {
if (is.na(p)) return("")
else if (p <= 0.001) return("***")
else if (p <= 0.01) return("**")
else if (p <= 0.05) return("*")
else if (p <= 0.1) return(".")
else return("")
}
result <- data.frame(
Test = c("ADF", "KPSS", "PP"),
`Test Statistic` = round(c(adf_stat, kpss_stat, pp_stat), 4),
`p-value` = signif(c(adf_pval, kpss_pval, pp_pval), 4),
`Signif.` = sapply(c(adf_pval, kpss_pval, pp_pval), sig_star),
`Null Hypothesis` = c(
"Unit root (non-stationary)",
"Stationarity",
"Unit root (non-stationary)"
)
)
return(result)
}
ntests <- function(model) {
jb <- jarque.bera.test(model)
shapiro <- shapiro.test(model)
ad <- ad.test(model)
cvm <- cvm.test(model)
lillie <- lillie.test(model)
sig_star <- function(p) {
if (is.na(p)) return("")
else if (p <= 0.001) return("***")
else if (p <= 0.01) return("**")
else if (p <= 0.05) return("*")
else if (p <= 0.1) return(".")
else return("")
}
result <- data.frame(
Test = c("Jarque-Bera", "Shapiro-Wilk", "Anderson-Darling", "Cramer-von Mises", "Lilliefors"),
`Test Statistic` = round(c(jb$statistic, shapiro$statistic, ad$statistic, cvm$statistic, lillie$statistic), 4),
`p-value` = signif(c(jb$p.value, shapiro$p.value, ad$p.value, cvm$p.value, lillie$p.value), 4),
`Signif.` = sapply(c(jb$p.value, shapiro$p.value, ad$p.value, cvm$p.value, lillie$p.value), sig_star),
`Null Hypothesis` = "Normality"
)
return(result)
}
plot.ts(data, main = "Given Data")
n <- length(data)
acf2(data, lag.max = n / 4); title("Correlogram of Given Data")
# Model 1: Regression Based ### Regression to remove trend
x <- seq(from = 1, to = n, by = 1)
x2 <- x^2
x3 <- x^3
x4 <- x^4
out.lm1 <- lm(data ~ 1 + x)
summary(out.lm1)
##
## Call:
## lm(formula = data ~ 1 + x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.393 -4.889 -1.447 4.421 13.704
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 38.174257 0.930809 41.01 <2e-16 ***
## x 0.164442 0.009554 17.21 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.005 on 166 degrees of freedom
## Multiple R-squared: 0.6409, Adjusted R-squared: 0.6387
## F-statistic: 296.3 on 1 and 166 DF, p-value: < 2.2e-16
out.lm2 <- lm(data ~ 1 + x + x2)
summary(out.lm2)
##
## Call:
## lm(formula = data ~ 1 + x + x2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.354 -4.886 -1.320 4.532 13.378
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 36.2749377 1.3971038 25.964 < 2e-16 ***
## x 0.2314765 0.0381691 6.065 8.74e-09 ***
## x2 -0.0003967 0.0002188 -1.813 0.0716 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.964 on 165 degrees of freedom
## Multiple R-squared: 0.6479, Adjusted R-squared: 0.6436
## F-statistic: 151.8 on 2 and 165 DF, p-value: < 2.2e-16
out.lm3 <- lm(data ~ 1 + x + x2 + x3)
summary(out.lm3)
##
## Call:
## lm(formula = data ~ 1 + x + x2 + x3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.692 -4.847 -1.320 4.372 12.313
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.877e+01 1.866e+00 20.777 <2e-16 ***
## x 5.718e-02 9.533e-02 0.600 0.5495
## x2 2.174e-03 1.309e-03 1.661 0.0986 .
## x3 -1.014e-05 5.091e-06 -1.992 0.0480 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.912 on 164 degrees of freedom
## Multiple R-squared: 0.6562, Adjusted R-squared: 0.6499
## F-statistic: 104.4 on 3 and 164 DF, p-value: < 2.2e-16
out.lm4 <- lm(data ~ 1 + x + x2 + x3 + x4)
summary(out.lm4)
##
## Call:
## lm(formula = data ~ 1 + x + x2 + x3 + x4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.585 -4.608 -1.433 4.425 12.105
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.789e+01 2.369e+00 15.992 <2e-16 ***
## x 1.587e-01 1.930e-01 0.822 0.412
## x2 -5.108e-04 4.625e-03 -0.110 0.912
## x3 1.452e-05 4.106e-05 0.354 0.724
## x4 -7.297e-08 1.205e-07 -0.605 0.546
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.923 on 163 degrees of freedom
## Multiple R-squared: 0.657, Adjusted R-squared: 0.6486
## F-statistic: 78.05 on 4 and 163 DF, p-value: < 2.2e-16
plot.ts(data, main = "Regression with Order 1")
lines(time, out.lm1$fitted.values, col = "red")
plot.ts(data, main = "Regression with Order 2")
lines(time, out.lm2$fitted.values, col = "red")
plot.ts(data, main = "Regression with Order 3")
lines(time, out.lm3$fitted.values, col = "red")
plot.ts(data, main = "Regression with Order 4")
lines(time, out.lm4$fitted.values, col = "red")
par(mfrow=c(2,2))
plot(out.lm1)
plot(out.lm2)
plot(out.lm3)
plot(out.lm4)
par(mfrow=c(1,1))
lms <- list(out.lm1, out.lm2, out.lm3, out.lm4)
aic.lms <- numeric(4)
bic.lms <- numeric(4)
d <- 0
for (lm in lms) {
d <- d + 1
aic.lms[d] <- AIC(lm)
bic.lms[d] <- BIC(lm)
}
y_min <- min(aic.lms, bic.lms)
y_max <- max(aic.lms, bic.lms)
plot(1:4, aic.lms, type="b", col="blue", pch=19,
xlab="Regression Order", ylab="Criterion Value",
main="AIC and BIC of Regression",
ylim=c(y_min, y_max))
lines(1:4, bic.lms, type="b", col="red", pch=19)
legend("topright", legend=c("AIC", "BIC"), col=c("blue", "red"), pch=19, lty=1)
d.aic <- aic.lms - min(aic.lms)
d.bic <- bic.lms - min(bic.lms)
y_min <- min(d.aic, d.bic)
y_max <- max(d.aic, d.bic)
plot(1:4, d.aic, type="b", col="blue", pch=19,
xlab="Regression Order", ylab="Difference from Minimum",
main="Difference of AIC and BIC",
ylim=c(y_min, y_max))
lines(1:4, d.bic, type="b", col="red", pch=19)
abline(h = 2) #if the value does not exceed 2, the model show almost same performance.
legend("topright", legend=c("AIC - min(AIC)", "BIC - min(BIC)"), col=c("blue", "red"), pch=19, lty=1)
acf2(out.lm2$residuals, lag.max = n/4); title("Correlogram after Removing Trend by Reg 1")
### Harmonic Regression
t <- 1:n; f1 <- floor(n/12); f2 <- 2*f1; f3 <- 3*f1; f4 <- 4*f1
cos1 <- cos(f1*2*pi/n*t); sin1 <- sin(f1*2*pi/n*t)
cos2 <- cos(f2*2*pi/n*t); sin2 <- sin(f2*2*pi/n*t)
cos3 <- cos(f3*2*pi/n*t); sin3 <- sin(f3*2*pi/n*t)
cos4 <- cos(f4*2*pi/n*t); sin4 <- sin(f4*2*pi/n*t)
out.hlm1 <- lm(data ~ 1 + cos1 + sin1)
out.hlm2 <- lm(data ~ 1 + cos1 + sin1 + cos2 + sin2)
out.hlm3 <- lm(data ~ 1 + cos1 + sin1 + cos2 + sin2 + cos3 + sin3)
out.hlm4 <- lm(data ~ 1 + cos1 + sin1 + cos2 + sin2 + cos3 + sin3 + cos4 + sin4)
hlms <- list(out.hlm1, out.hlm2, out.hlm3, out.hlm4)
plot.ts(data - out.lm1$fitted.values, main = "Harmonic Regression Results", ylab = "Fitted Values")
lines(time, out.hlm1$fitted.values - mean(out.hlm1$fitted.values), col = "red")
lines(time, out.hlm2$fitted.values - mean(out.hlm2$fitted.values), col = "blue")
#lines(time, out.hlm3$fitted.values - mean(out.hlm3$fitted.values), col = "blue")
#lines(time, out.hlm2$fitted.values - mean(out.hlm2$fitted.values), col = "purple")
legend("topright", legend=c("k = 1", "k = 2"), col=c("red", "blue"), pch=19, lty=1)
#harmonic regression model selection by Information Criteria
aic.hlms <- numeric(4)
bic.hlms <- numeric(4)
k <- 0
for (hlm in hlms) {
k <- k + 1
aic.hlms[k] <- AIC(hlm)
bic.hlms[k] <- BIC(hlm)
}
y_min <- min(aic.hlms, bic.hlms)
y_max <- max(aic.hlms, bic.hlms)
plot(1:4, aic.hlms, type="b", col="blue", pch=19,
xlab="Harmonic terms (k)", ylab="Criterion Value",
main="AIC and BIC of Harmonic Regression",
ylim=c(y_min, y_max))
lines(1:4, bic.hlms, type="b", col="red", pch=19)
legend("topright", legend=c("AIC", "BIC"), col=c("blue", "red"), pch=19, lty=1)
d.aic <- aic.hlms - min(aic.hlms)
d.bic <- bic.hlms - min(bic.hlms)
y_min <- min(d.aic, d.bic)
y_max <- max(d.aic, d.bic)
plot(1:4, d.aic, type="b", col="blue", pch=19,
xlab="Harmonic terms (k)", ylab="Difference from Minimum",
main="Difference of AIC and BIC",
ylim=c(y_min, y_max))
lines(1:4, d.bic, type="b", col="red", pch=19)
abline(h = 2) #if the value does not exceed 2, the model show almost same performance.
legend("topright", legend=c("AIC - min(AIC)", "BIC - min(BIC)"), col=c("blue", "red"), pch=19, lty=1)
model1 <- data - out.lm1$fitted.values - (out.hlm1$fitted.values - mean(out.hlm1$fitted.values))
plot.ts(model1, main = "Regression Based Model")
sstest(model1)
## Warning in pp.test(model): p-value smaller than printed p-value
## Test Test.Statistic p.value Signif.
## Dickey-Fuller ADF -2.3610 0.42560
## KPSS Level KPSS 0.3531 0.09739 .
## Dickey-Fuller Z(alpha) PP -127.7105 0.01000 **
## Null.Hypothesis
## Dickey-Fuller Unit root (non-stationary)
## KPSS Level Stationarity
## Dickey-Fuller Z(alpha) Unit root (non-stationary)
acf2(model1, lag.max = n/4); title("Correlogram of Reg Based")
test(model1)
## Null hypothesis: Residuals are iid noise.
## Test Distribution Statistic p-value
## Ljung-Box Q Q ~ chisq(20) 236.38 0 *
## McLeod-Li Q Q ~ chisq(20) 51.81 1e-04 *
## Turning points T (T-110.7)/5.4 ~ N(0,1) 117 0.2439
## Diff signs S (S-83.5)/3.8 ~ N(0,1) 70 3e-04 *
## Rank P (P-7014)/364.5 ~ N(0,1) 7338 0.3741
#Normality Tests
ntests(model1)
## Test Test.Statistic p.value Signif. Null.Hypothesis
## 1 Jarque-Bera 1.1123 0.5734 Normality
## 2 Shapiro-Wilk 0.9946 0.8019 Normality
## 3 Anderson-Darling 0.2011 0.8798 Normality
## 4 Cramer-von Mises 0.0292 0.8567 Normality
## 5 Lilliefors 0.0315 0.9511 Normality
cd <- classical(data, d = 12, order = 1)
out = classical(data, d=12, order=1);
par(mfrow=c(2,2))
plot.ts(data);
title("step1")
lines(time, cd$m1, col="red");
plot.ts(data-cd$m1);
title("step2")
lines(time, cd$st, col="red");
plot.ts(data-cd$st);
title("step3")
lines(time, cd$m, col="red");
plot.ts(data);
lines(time, cd$fit, col="red")
title("Final")
model2 <- cd$resi
par(mfrow = c(1,1))
plot.ts(model2, main = "Smoothing Based")
acf2(model2, lag.max = n/4); title("Correlogram of Model 2")
sstest(model2)
## Warning in kpss.test(model, null = "Level"): p-value greater than printed
## p-value
## Test Test.Statistic p.value Signif.
## Dickey-Fuller ADF -2.5195 0.35940
## KPSS Level KPSS 0.3233 0.10000 .
## Dickey-Fuller Z(alpha) PP -24.1594 0.02422 *
## Null.Hypothesis
## Dickey-Fuller Unit root (non-stationary)
## KPSS Level Stationarity
## Dickey-Fuller Z(alpha) Unit root (non-stationary)
test(model2)
## Null hypothesis: Residuals are iid noise.
## Test Distribution Statistic p-value
## Ljung-Box Q Q ~ chisq(20) 643.14 0 *
## McLeod-Li Q Q ~ chisq(20) 365.57 0 *
## Turning points T (T-110.7)/5.4 ~ N(0,1) 98 0.0198 *
## Diff signs S (S-83.5)/3.8 ~ N(0,1) 79 0.2305
## Rank P (P-7014)/364.5 ~ N(0,1) 7058 0.9039
# Normality Test
ntests(model2)
## Test Test.Statistic p.value Signif. Null.Hypothesis
## 1 Jarque-Bera 2.6773 0.2622000 Normality
## 2 Shapiro-Wilk 0.9790 0.0119800 * Normality
## 3 Anderson-Darling 1.3343 0.0017910 ** Normality
## 4 Cramer-von Mises 0.2156 0.0033680 ** Normality
## 5 Lilliefors 0.0992 0.0003618 *** Normality
sd <- diff(data, 12)
plot.ts(sd, main = "Seasonal Differenced")
acf2(sd, lag.max = n/4); title("Correlogram after Seasonal Diff")
model3 <- diff(sd)
plot.ts(model3, main = "Diff Based")
sstest(model3)
## Warning in adf.test(model, alternative = "stationary"): p-value smaller than
## printed p-value
## Warning in kpss.test(model, null = "Level"): p-value greater than printed
## p-value
## Warning in pp.test(model): p-value smaller than printed p-value
## Test Test.Statistic p.value Signif.
## Dickey-Fuller ADF -5.3156 0.01 **
## KPSS Level KPSS 0.0369 0.10 .
## Dickey-Fuller Z(alpha) PP -180.6863 0.01 **
## Null.Hypothesis
## Dickey-Fuller Unit root (non-stationary)
## KPSS Level Stationarity
## Dickey-Fuller Z(alpha) Unit root (non-stationary)
test(model3)
## Null hypothesis: Residuals are iid noise.
## Test Distribution Statistic p-value
## Ljung-Box Q Q ~ chisq(20) 77.33 0 *
## McLeod-Li Q Q ~ chisq(20) 23.24 0.2772
## Turning points T (T-102)/5.2 ~ N(0,1) 105 0.5654
## Diff signs S (S-77)/3.6 ~ N(0,1) 73 0.2673
## Rank P (P-5967.5)/323.2 ~ N(0,1) 5897 0.8273
#Normality tests
ntests(model3)
## Test Test.Statistic p.value Signif. Null.Hypothesis
## 1 Jarque-Bera 5.4396 0.06589 . Normality
## 2 Shapiro-Wilk 0.9838 0.06587 . Normality
## 3 Anderson-Darling 0.5067 0.19810 Normality
## 4 Cramer-von Mises 0.0594 0.38270 Normality
## 5 Lilliefors 0.0404 0.77670 Normality
Transform to Rmd
library(knitr)
knitr::spin("2022 MIDTERM.R", knit = FALSE)
## [1] "2022 MIDTERM.Rmd"