setwd("/Users/iyeonghan/skku/25-1/Time Series/Final/실습")
rm(list = ls(all = TRUE))
library(TSlibrary)
library(itsmr)
##
## Attaching package: 'itsmr'
## The following objects are masked from 'package:TSlibrary':
##
## airpass, season, smooth.exp, smooth.ma, trend
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Attaching package: 'tseries'
## The following object is masked from 'package:itsmr':
##
## arma
library(urca)
library(nortest)
library(forecast)
##
## Attaching package: 'forecast'
## The following object is masked from 'package:itsmr':
##
## forecast
library(nlme)
##
## Attaching package: 'nlme'
## The following object is masked from 'package:forecast':
##
## getResponse
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:itsmr':
##
## deaths
sstest <- function(model) {
# ADF Test
adf <- adf.test(model, alternative = "stationary")
adf_stat <- adf$statistic
adf_pval <- adf$p.value
# KPSS Test
kpss <- kpss.test(model, null = "Level")
kpss_stat <- kpss$statistic
kpss_pval <- kpss$p.value
# PP Test
pp <- pp.test(model)
pp_stat <- pp$statistic
pp_pval <- pp$p.value
sig_star <- function(p) {
if (is.na(p)) return("")
else if (p <= 0.001) return("***")
else if (p <= 0.01) return("**")
else if (p <= 0.05) return("*")
else if (p <= 0.1) return(".")
else return("")
}
result <- data.frame(
Test = c("ADF", "KPSS", "PP"),
`Test Statistic` = round(c(adf_stat, kpss_stat, pp_stat), 4),
`p-value` = signif(c(adf_pval, kpss_pval, pp_pval), 4),
`Signif.` = sapply(c(adf_pval, kpss_pval, pp_pval), sig_star),
`Null Hypothesis` = c(
"Unit root (non-stationary)",
"Stationarity",
"Unit root (non-stationary)"
)
)
return(result)
}
ntests <- function(model) {
jb <- jarque.bera.test(model)
shapiro <- shapiro.test(model)
ad <- ad.test(model)
cvm <- cvm.test(model)
lillie <- lillie.test(model)
sig_star <- function(p) {
if (is.na(p)) return("")
else if (p <= 0.001) return("***")
else if (p <= 0.01) return("**")
else if (p <= 0.05) return("*")
else if (p <= 0.1) return(".")
else return("")
}
result <- data.frame(
Test = c("Jarque-Bera", "Shapiro-Wilk", "Anderson-Darling", "Cramer-von Mises", "Lilliefors"),
`Test Statistic` = round(c(jb$statistic, shapiro$statistic, ad$statistic, cvm$statistic, lillie$statistic), 4),
`p-value` = signif(c(jb$p.value, shapiro$p.value, ad$p.value, cvm$p.value, lillie$p.value), 4),
`Signif.` = sapply(c(jb$p.value, shapiro$p.value, ad$p.value, cvm$p.value, lillie$p.value), sig_star),
`Null Hypothesis` = "Normality"
)
return(result)
}
data <- read.csv("2024practice2.csv", header = FALSE)
data <- ts(data, start = c(1924,01), end = c(2023, 12), frequency = 12)
time <- as.vector(time(data))
n <- length(data)
plot.ts(data, main = "Given Data")
acf2(data, lag.max = 50); title("Correlogram")
### (b) constant variance but linear trend exists, so the data is not
stationary. Also, the seasonality with period 12 is included ###(c)
x <- seq(from = 1, to = n, by = 1)
reg1 <- lm(data ~ 1 + x)
resid1 <- reg1$residuals
t <- 1:n; f1 <- floor(n/12); f2 <- 2*f1; f3 <- 3*f1; f4 <- 4*f1; f5 <- 5*f1; f6 <- 6*f1
cos1 <- cos(f1*2*pi/n*t); sin1 <- sin(f1*2*pi/n*t)
cos2 <- cos(f2*2*pi/n*t); sin2 <- sin(f2*2*pi/n*t)
cos3 <- cos(f3*2*pi/n*t); sin3 <- sin(f3*2*pi/n*t)
cos4 <- cos(f4*2*pi/n*t); sin4 <- sin(f4*2*pi/n*t)
cos5 <- cos(f5*2*pi/n*t); sin5 <- sin(f5*2*pi/n*t)
cos6 <- cos(f6*2*pi/n*t); sin6 <- sin(f6*2*pi/n*t)
reg2 <- lm(data ~ 1 + x + cos1 + sin1)
par(mfrow = c(1,1))
plot.ts(data, main = "Given Data")
lines(time, reg2$fitted.values, col = "red")
resid2 <- reg2$residuals
reg3 <- lm(data ~ 1 + x + cos1 + sin1 + cos2 + sin2)
par(mfrow = c(1,1))
plot.ts(data, main = "Given Data")
lines(time, reg3$fitted.values, col = "red")
resid3 <- reg3$residuals
reg4 <- lm(data ~ 1 + x + cos1 + sin1 + cos2 + sin2 + cos3 + sin3)
par(mfrow = c(1,1))
plot.ts(data, main = "Given Data")
lines(time, reg4$fitted.values, col = "red")
resid4 <- reg4$residuals
reg5 <- lm(data ~ 1 + x + cos1 + sin1 + cos2 + sin2 + cos3 + sin3 + cos4 + sin4)
par(mfrow = c(1,1))
plot.ts(data, main = "Given Data")
lines(time, reg5$fitted.values, col = "red")
resid5 <- reg5$residuals
layout(matrix(c(1,1,2,3), 2, 2, byrow = TRUE))
plot.ts(resid1, main = "residual from reg1")
acf2(resid1, lag.max = 100); title("ACF")
Pacf(resid1, lag=n/4, main = "PACF")
sstest(resid1)
## Warning in adf.test(model, alternative = "stationary"): p-value smaller than
## printed p-value
## Warning in kpss.test(model, null = "Level"): p-value greater than printed
## p-value
## Warning in pp.test(model): p-value smaller than printed p-value
## Test Test.Statistic p.value Signif.
## Dickey-Fuller ADF -9.8630 0.01 **
## KPSS Level KPSS 0.0049 0.10 .
## Dickey-Fuller Z(alpha) PP -223.9978 0.01 **
## Null.Hypothesis
## Dickey-Fuller Unit root (non-stationary)
## KPSS Level Stationarity
## Dickey-Fuller Z(alpha) Unit root (non-stationary)
test(resid1)
## Null hypothesis: Residuals are iid noise.
## Test Distribution Statistic p-value
## Ljung-Box Q Q ~ chisq(20) 11318.8 0 *
## McLeod-Li Q Q ~ chisq(20) 7621.96 0 *
## Turning points T (T-798.7)/14.6 ~ N(0,1) 217 0 *
## Diff signs S (S-599.5)/10 ~ N(0,1) 664 0 *
## Rank P (P-359700)/6932.5 ~ N(0,1) 359314 0.9556
layout(matrix(c(1,1,2,3), 2, 2, byrow = TRUE))
plot.ts(resid2, main = "residual from reg2")
acf2(resid2, lag.max = 100); title("ACF")
Pacf(resid2, lag=n/4, main = "PACF")
sstest(resid2)
## Warning in adf.test(model, alternative = "stationary"): p-value smaller than
## printed p-value
## Warning in kpss.test(model, null = "Level"): p-value greater than printed
## p-value
## Warning in pp.test(model): p-value smaller than printed p-value
## Test Test.Statistic p.value Signif.
## Dickey-Fuller ADF -10.0202 0.01 **
## KPSS Level KPSS 0.0905 0.10 .
## Dickey-Fuller Z(alpha) PP -808.2333 0.01 **
## Null.Hypothesis
## Dickey-Fuller Unit root (non-stationary)
## KPSS Level Stationarity
## Dickey-Fuller Z(alpha) Unit root (non-stationary)
test(resid2)
## Null hypothesis: Residuals are iid noise.
## Test Distribution Statistic p-value
## Ljung-Box Q Q ~ chisq(20) 454.06 0 *
## McLeod-Li Q Q ~ chisq(20) 49.64 2e-04 *
## Turning points T (T-798.7)/14.6 ~ N(0,1) 691 0 *
## Diff signs S (S-599.5)/10 ~ N(0,1) 588 0.2503
## Rank P (P-359700)/6932.5 ~ N(0,1) 358805 0.8973
layout(matrix(c(1,1,2,3), 2, 2, byrow = TRUE))
plot.ts(resid3, main = "residual from reg3")
acf2(resid3, lag.max = 100); title("ACF")
Pacf(resid3, lag=n/4, main = "PACF")
sstest(resid3)
## Warning in adf.test(model, alternative = "stationary"): p-value smaller than
## printed p-value
## Warning in kpss.test(model, null = "Level"): p-value greater than printed
## p-value
## Warning in pp.test(model): p-value smaller than printed p-value
## Test Test.Statistic p.value Signif.
## Dickey-Fuller ADF -9.8230 0.01 **
## KPSS Level KPSS 0.0963 0.10 .
## Dickey-Fuller Z(alpha) PP -1005.1273 0.01 **
## Null.Hypothesis
## Dickey-Fuller Unit root (non-stationary)
## KPSS Level Stationarity
## Dickey-Fuller Z(alpha) Unit root (non-stationary)
test(resid3)
## Null hypothesis: Residuals are iid noise.
## Test Distribution Statistic p-value
## Ljung-Box Q Q ~ chisq(20) 130.93 0 *
## McLeod-Li Q Q ~ chisq(20) 46.59 7e-04 *
## Turning points T (T-798.7)/14.6 ~ N(0,1) 731 0 *
## Diff signs S (S-599.5)/10 ~ N(0,1) 595 0.6528
## Rank P (P-359700)/6932.5 ~ N(0,1) 360625 0.8939
layout(matrix(c(1,1,2,3), 2, 2, byrow = TRUE))
plot.ts(resid4, main = "residual from reg4")
acf2(resid4, lag.max = 100); title("ACF")
Pacf(resid4, lag=n/4, main = "PACF")
sstest(resid4)
## Warning in adf.test(model, alternative = "stationary"): p-value smaller than
## printed p-value
## Warning in kpss.test(model, null = "Level"): p-value greater than printed
## p-value
## Warning in pp.test(model): p-value smaller than printed p-value
## Test Test.Statistic p.value Signif.
## Dickey-Fuller ADF -9.5873 0.01 **
## KPSS Level KPSS 0.0955 0.10 .
## Dickey-Fuller Z(alpha) PP -1019.0243 0.01 **
## Null.Hypothesis
## Dickey-Fuller Unit root (non-stationary)
## KPSS Level Stationarity
## Dickey-Fuller Z(alpha) Unit root (non-stationary)
test(resid4)
## Null hypothesis: Residuals are iid noise.
## Test Distribution Statistic p-value
## Ljung-Box Q Q ~ chisq(20) 95.9 0 *
## McLeod-Li Q Q ~ chisq(20) 62.43 0 *
## Turning points T (T-798.7)/14.6 ~ N(0,1) 763 0.0145 *
## Diff signs S (S-599.5)/10 ~ N(0,1) 598 0.8808
## Rank P (P-359700)/6932.5 ~ N(0,1) 359848 0.983
layout(matrix(c(1,1,2,3), 2, 2, byrow = TRUE))
plot.ts(resid5, main = "residual from reg5")
acf2(resid5, lag.max = 100); title("ACF")
Pacf(resid5, lag=100, main = "PACF")
sstest(resid5)
## Warning in adf.test(model, alternative = "stationary"): p-value smaller than
## printed p-value
## Warning in kpss.test(model, null = "Level"): p-value greater than printed
## p-value
## Warning in pp.test(model): p-value smaller than printed p-value
## Test Test.Statistic p.value Signif.
## Dickey-Fuller ADF -9.4400 0.01 **
## KPSS Level KPSS 0.0958 0.10 .
## Dickey-Fuller Z(alpha) PP -987.9755 0.01 **
## Null.Hypothesis
## Dickey-Fuller Unit root (non-stationary)
## KPSS Level Stationarity
## Dickey-Fuller Z(alpha) Unit root (non-stationary)
test(resid5)
## Null hypothesis: Residuals are iid noise.
## Test Distribution Statistic p-value
## Ljung-Box Q Q ~ chisq(20) 101.36 0 *
## McLeod-Li Q Q ~ chisq(20) 69.18 0 *
## Turning points T (T-798.7)/14.6 ~ N(0,1) 773 0.0786
## Diff signs S (S-599.5)/10 ~ N(0,1) 605 0.5825
## Rank P (P-359700)/6932.5 ~ N(0,1) 360963 0.8554
summary(reg4)
##
## Call:
## lm(formula = data ~ 1 + x + cos1 + sin1 + cos2 + sin2 + cos3 +
## sin3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.16080 -0.18999 0.00499 0.18408 1.11551
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.392e+01 1.711e-02 -813.735 < 2e-16 ***
## x 2.804e-02 2.467e-05 1136.446 < 2e-16 ***
## cos1 -2.367e+00 1.209e-02 -195.805 < 2e-16 ***
## sin1 -1.839e+00 1.209e-02 -152.153 < 2e-16 ***
## cos2 -2.086e-01 1.209e-02 -17.259 < 2e-16 ***
## sin2 -4.204e-02 1.209e-02 -3.478 0.000523 ***
## cos3 1.080e-01 1.209e-02 8.937 < 2e-16 ***
## sin3 -6.197e-02 1.209e-02 -5.127 3.44e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2961 on 1192 degrees of freedom
## Multiple R-squared: 0.9991, Adjusted R-squared: 0.9991
## F-statistic: 1.936e+05 on 7 and 1192 DF, p-value: < 2.2e-16
summary(reg5)
##
## Call:
## lm(formula = data ~ 1 + x + cos1 + sin1 + cos2 + sin2 + cos3 +
## sin3 + cos4 + sin4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.15401 -0.18569 0.00561 0.17677 1.12239
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.392e+01 1.689e-02 -824.030 < 2e-16 ***
## x 2.804e-02 2.437e-05 1150.821 < 2e-16 ***
## cos1 -2.367e+00 1.194e-02 -198.281 < 2e-16 ***
## sin1 -1.839e+00 1.194e-02 -154.077 < 2e-16 ***
## cos2 -2.086e-01 1.194e-02 -17.477 < 2e-16 ***
## sin2 -4.204e-02 1.194e-02 -3.522 0.000444 ***
## cos3 1.080e-01 1.194e-02 9.050 < 2e-16 ***
## sin3 -6.197e-02 1.194e-02 -5.191 2.45e-07 ***
## cos4 -5.508e-02 1.194e-02 -4.615 4.36e-06 ***
## sin4 -3.967e-02 1.194e-02 -3.323 0.000917 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2924 on 1190 degrees of freedom
## Multiple R-squared: 0.9991, Adjusted R-squared: 0.9991
## F-statistic: 1.544e+05 on 9 and 1190 DF, p-value: < 2.2e-16
lms <- list(reg1, reg2, reg3, reg4, reg5)
aic.lms <- numeric(5)
bic.lms <- numeric(5)
d <- 0
for (lm in lms) {
d <- d + 1
aic.lms[d] <- AIC(lm)
bic.lms[d] <- BIC(lm)
}
y_min <- min(aic.lms, bic.lms)
y_max <- max(aic.lms, bic.lms)
plot(0:4, aic.lms, type="b", col="blue", pch=19,
xlab="Harmonic term order", ylab="Criterion Value",
main="AIC and BIC of Regression",
ylim=c(y_min, y_max))
lines(0:4, bic.lms, type="b", col="red", pch=19)
legend("topright", legend=c("AIC", "BIC"), col=c("blue", "red"), pch=19, lty=1)
d.aic <- aic.lms - min(aic.lms)
d.bic <- bic.lms - min(bic.lms)
y_min <- min(d.aic, d.bic)
y_max <- max(d.aic, d.bic)
plot(0:4, d.aic, type="b", col="blue", pch=19,
xlab="Harmonic term order", ylab="Difference from Minimum",
main="Difference of AIC and BIC",
ylim=c(y_min, y_max))
lines(0:4, d.bic, type="b", col="red", pch=19)
abline(h = 2) #if the value does not exceed 2, the model show almost same performance.
legend("topright", legend=c("AIC - min(AIC)", "BIC - min(BIC)"), col=c("blue", "red"), pch=19, lty=1)
### In terms of both AIC and BIC, the regression with linear trend and
4th order harmonic series is the best. ### From SACF and SPACF of the
residuals, error~ARMA(1,3)
X <- cbind(x, cos1, sin1, cos2, sin2, cos3, sin3, cos4, sin4)
arma0 <- Arima(data, order = c(1,0,0), xreg = X, include.mean = TRUE)
arma0
## Series: data
## Regression with ARIMA(1,0,0) errors
##
## Coefficients:
## ar1 intercept x cos1 sin1 cos2 sin2 cos3
## 0.2317 -13.9192 0.028 -2.3667 -1.8391 -0.2087 -0.0420 0.1080
## s.e. 0.0281 0.0213 0.000 0.0143 0.0143 0.0128 0.0128 0.0113
## sin3 cos4 sin4
## -0.0620 -0.0551 -0.0397
## s.e. 0.0113 0.0102 0.0102
##
## sigma^2 = 0.08096: log likelihood = -188.95
## AIC=401.9 AICc=402.16 BIC=462.98
arma1 <- Arima(data, order = c(1,0,1), xreg = X, include.mean = TRUE)
arma1
## Series: data
## Regression with ARIMA(1,0,1) errors
##
## Coefficients:
## ar1 ma1 intercept x cos1 sin1 cos2 sin2
## 0.4929 -0.2775 -13.9195 0.028 -2.3668 -1.8391 -0.2086 -0.0420
## s.e. 0.1007 0.1113 0.0232 0.000 0.0143 0.0143 0.0119 0.0119
## cos3 sin3 cos4 sin4
## 0.1080 -0.0619 -0.0552 -0.0396
## s.e. 0.0107 0.0107 0.0102 0.0102
##
## sigma^2 = 0.08068: log likelihood = -186.35
## AIC=398.71 AICc=399.01 BIC=464.88
arma2 <- Arima(data, order = c(1,0,2), xreg = X, include.mean = TRUE)
arma2
## Series: data
## Regression with ARIMA(1,0,2) errors
##
## Coefficients:
## ar1 ma1 ma2 intercept x cos1 sin1 cos2
## 0.5159 -0.300 -0.0078 -13.9195 0.028 -2.3667 -1.8390 -0.2086
## s.e. 0.2030 0.204 0.0619 0.0233 0.000 0.0142 0.0142 0.0119
## sin2 cos3 sin3 cos4 sin4
## -0.0419 0.1080 -0.0619 -0.0551 -0.0397
## s.e. 0.0119 0.0108 0.0108 0.0102 0.0102
##
## sigma^2 = 0.08074: log likelihood = -186.34
## AIC=400.69 AICc=401.04 BIC=471.95
arma3 <- Arima(data, order = c(1,0,3), xreg = X, include.mean = TRUE)
arma3
## Series: data
## Regression with ARIMA(1,0,3) errors
##
## Coefficients:
## ar1 ma1 ma2 ma3 intercept x cos1 sin1
## -0.454 0.6744 0.1991 0.1199 -13.9195 0.028 -2.3668 -1.8391
## s.e. 0.222 0.2208 0.0580 0.0294 0.0223 0.000 0.0147 0.0147
## cos2 sin2 cos3 sin3 cos4 sin4
## -0.2086 -0.0419 0.1079 -0.0620 -0.0551 -0.0397
## s.e. 0.0121 0.0121 0.0102 0.0102 0.0106 0.0106
##
## sigma^2 = 0.08045: log likelihood = -183.67
## AIC=397.34 AICc=397.75 BIC=473.69
armas <- list(arma0, arma1, arma2, arma3)
aicc.armas <- numeric(4)
bic.armas <- numeric(4)
k <- 0
for (mod in armas) {
k <- k + 1
aicc.armas[k] <- mod$aicc
bic.armas[k] <- mod$bic
}
y_min <- min(aicc.armas, bic.armas)
y_max <- max(aicc.armas, bic.armas)
par(mfrow = c(1,2))
plot(0:3, aicc.armas, type="b", col="blue", pch=19,
xlab="q", ylab="Criterion Value",
main="AICc and BIC of ARMA(1,q)",
ylim=c(y_min, y_max))
lines(0:3, bic.armas, type="b", col="red", pch=19)
legend("topright", legend=c("AICc", "BIC"), col=c("blue", "red"), pch=19, lty=1)
d.aicc <- aicc.armas - min(aicc.armas)
d.bic <- bic.armas - min(bic.armas)
y_min <- min(d.aicc, d.bic)
y_max <- max(d.aicc, d.bic)
plot(0:3, d.aicc, type="b", col="blue", pch=19,
xlab="q", ylab="Difference from Minimum",
main="Difference of AICc and BIC",
ylim=c(y_min, y_max))
lines(0:3, d.bic, type="b", col="red", pch=19)
abline(h = 2)
legend("topright", legend=c("AICc - min(AIC)", "BIC - min(BIC)"), col=c("blue", "red"), pch=19, lty=1)
test(residuals(arma0))
## Null hypothesis: Residuals are iid noise.
## Test Distribution Statistic p-value
## Ljung-Box Q Q ~ chisq(20) 23.17 0.2804
## McLeod-Li Q Q ~ chisq(20) 70.67 0 *
## Turning points T (T-798.7)/14.6 ~ N(0,1) 805 0.6643
## Diff signs S (S-599.5)/10 ~ N(0,1) 613 0.1772
## Rank P (P-359700)/6932.5 ~ N(0,1) 359292 0.9531
ntests(residuals(arma0))
## Test Test.Statistic p.value Signif. Null.Hypothesis
## 1 Jarque-Bera 32.6802 8.009e-08 *** Normality
## 2 Shapiro-Wilk 0.9944 1.914e-04 *** Normality
## 3 Anderson-Darling 1.4917 7.571e-04 *** Normality
## 4 Cramer-von Mises 0.2418 1.627e-03 ** Normality
## 5 Lilliefors 0.0287 2.168e-02 * Normality
test(residuals(arma1))
## Null hypothesis: Residuals are iid noise.
## Test Distribution Statistic p-value
## Ljung-Box Q Q ~ chisq(20) 17 0.6527
## McLeod-Li Q Q ~ chisq(20) 71.49 0 *
## Turning points T (T-798.7)/14.6 ~ N(0,1) 797 0.9091
## Diff signs S (S-599.5)/10 ~ N(0,1) 609 0.3423
## Rank P (P-359700)/6932.5 ~ N(0,1) 359294 0.9533
ntests(residuals(arma1))
## Test Test.Statistic p.value Signif. Null.Hypothesis
## 1 Jarque-Bera 31.4349 1.493e-07 *** Normality
## 2 Shapiro-Wilk 0.9946 2.798e-04 *** Normality
## 3 Anderson-Darling 1.4410 1.008e-03 ** Normality
## 4 Cramer-von Mises 0.2286 2.358e-03 ** Normality
## 5 Lilliefors 0.0264 4.857e-02 * Normality
test(residuals(arma2))
## Null hypothesis: Residuals are iid noise.
## Test Distribution Statistic p-value
## Ljung-Box Q Q ~ chisq(20) 17.01 0.6522
## McLeod-Li Q Q ~ chisq(20) 71.58 0 *
## Turning points T (T-798.7)/14.6 ~ N(0,1) 797 0.9091
## Diff signs S (S-599.5)/10 ~ N(0,1) 609 0.3423
## Rank P (P-359700)/6932.5 ~ N(0,1) 359231 0.9461
ntests(residuals(arma2))
## Test Test.Statistic p.value Signif. Null.Hypothesis
## 1 Jarque-Bera 31.5165 1.433e-07 *** Normality
## 2 Shapiro-Wilk 0.9946 2.757e-04 *** Normality
## 3 Anderson-Darling 1.4427 9.988e-04 *** Normality
## 4 Cramer-von Mises 0.2293 2.315e-03 ** Normality
## 5 Lilliefors 0.0265 4.679e-02 * Normality
test(residuals(arma3))
## Null hypothesis: Residuals are iid noise.
## Test Distribution Statistic p-value
## Ljung-Box Q Q ~ chisq(20) 12.38 0.9022
## McLeod-Li Q Q ~ chisq(20) 67.94 0 *
## Turning points T (T-798.7)/14.6 ~ N(0,1) 791 0.5994
## Diff signs S (S-599.5)/10 ~ N(0,1) 604 0.6528
## Rank P (P-359700)/6932.5 ~ N(0,1) 359560 0.9839
ntests(residuals(arma3))
## Test Test.Statistic p.value Signif. Null.Hypothesis
## 1 Jarque-Bera 31.8217 1.230e-07 *** Normality
## 2 Shapiro-Wilk 0.9945 2.300e-04 *** Normality
## 3 Anderson-Darling 1.4396 1.016e-03 ** Normality
## 4 Cramer-von Mises 0.2200 3.019e-03 ** Normality
## 5 Lilliefors 0.0282 2.588e-02 * Normality
d1.data <- diff(data)
layout(matrix(c(1,1,2,3),2,2, byrow = TRUE))
plot.ts(d1.data, main = "d = 1")
acf2(d1.data, lag.max = 100); title("ACF")
Pacf(d1.data, lag.max = 100)
D1.data <- diff(data,12)
layout(matrix(c(1,1,2,3),2,2, byrow = TRUE))
plot.ts(D1.data, main = "D = 1")
acf2(D1.data, lag.max = 100); title("ACF")
Pacf(D1.data, lag.max = 100)
dD1.data <- diff(diff(data,12))
layout(matrix(c(1,1,2,3),2,2, byrow = TRUE))
plot.ts(dD1.data, main = "d = 1, D = 1")
acf2(dD1.data, lag.max = 100); title("ACF")
Pacf(dD1.data, lag.max = 100)
sstest(d1.data)
## Warning in adf.test(model, alternative = "stationary"): p-value smaller than
## printed p-value
## Warning in kpss.test(model, null = "Level"): p-value greater than printed
## p-value
## Warning in pp.test(model): p-value smaller than printed p-value
## Test Test.Statistic p.value Signif.
## Dickey-Fuller ADF -39.4140 0.01 **
## KPSS Level KPSS 0.0063 0.10 .
## Dickey-Fuller Z(alpha) PP -259.0648 0.01 **
## Null.Hypothesis
## Dickey-Fuller Unit root (non-stationary)
## KPSS Level Stationarity
## Dickey-Fuller Z(alpha) Unit root (non-stationary)
sstest(D1.data)
## Warning in adf.test(model, alternative = "stationary"): p-value smaller than
## printed p-value
## Warning in kpss.test(model, null = "Level"): p-value greater than printed
## p-value
## Warning in pp.test(model): p-value smaller than printed p-value
## Test Test.Statistic p.value Signif.
## Dickey-Fuller ADF -10.8794 0.01 **
## KPSS Level KPSS 0.0103 0.10 .
## Dickey-Fuller Z(alpha) PP -1013.5712 0.01 **
## Null.Hypothesis
## Dickey-Fuller Unit root (non-stationary)
## KPSS Level Stationarity
## Dickey-Fuller Z(alpha) Unit root (non-stationary)
sstest(dD1.data)
## Warning in adf.test(model, alternative = "stationary"): p-value smaller than
## printed p-value
## Warning in kpss.test(model, null = "Level"): p-value greater than printed
## p-value
## Warning in pp.test(model): p-value smaller than printed p-value
## Test Test.Statistic p.value Signif.
## Dickey-Fuller ADF -10.2797 0.01 **
## KPSS Level KPSS 0.0075 0.10 .
## Dickey-Fuller Z(alpha) PP -1334.3421 0.01 **
## Null.Hypothesis
## Dickey-Fuller Unit root (non-stationary)
## KPSS Level Stationarity
## Dickey-Fuller Z(alpha) Unit root (non-stationary)
par(mfrow = c(1,1))
acf2(D1.data, lag.max = n/4); title("ACF")
Pacf(D1.data, lag.max = n/4)
init.sarima <- auto.arima(data, d=0, D=1, max.q = 3, max.Q = 1, max.p = 1, max.P = 8, stepwise = FALSE, approximation = FALSE)
sarima1 <- arima(data, order = c(1,0,3), seasonal = list(order = c(2,1,1)))
sarima2 <- arima(x = data, order = c(1, 0, 3), seasonal = list(order = c(2, 1, 0)))
sarima4 <- arima(x = data, order = c(4, 1, 1), seasonal = list(order = c(5, 1, 1)))
sarima7 <- arima(x = data, order = c(1, 0, 3), seasonal = list(order = c(4, 1, 0)))
test(init.sarima$residuals)
## Null hypothesis: Residuals are iid noise.
## Test Distribution Statistic p-value
## Ljung-Box Q Q ~ chisq(20) 33.03 0.0335 *
## McLeod-Li Q Q ~ chisq(20) 80.72 0 *
## Turning points T (T-798.7)/14.6 ~ N(0,1) 837 0.0086 *
## Diff signs S (S-599.5)/10 ~ N(0,1) 603 0.7264
## Rank P (P-359700)/6932.5 ~ N(0,1) 366088 0.3568
test(sarima1$residuals)
## Null hypothesis: Residuals are iid noise.
## Test Distribution Statistic p-value
## Ljung-Box Q Q ~ chisq(20) 20.38 0.4346
## McLeod-Li Q Q ~ chisq(20) 75.76 0 *
## Turning points T (T-798.7)/14.6 ~ N(0,1) 803 0.7665
## Diff signs S (S-599.5)/10 ~ N(0,1) 615 0.1213
## Rank P (P-359700)/6932.5 ~ N(0,1) 359657 0.9951
test(sarima2$residuals)
## Null hypothesis: Residuals are iid noise.
## Test Distribution Statistic p-value
## Ljung-Box Q Q ~ chisq(20) 31.58 0.048 *
## McLeod-Li Q Q ~ chisq(20) 66.96 0 *
## Turning points T (T-798.7)/14.6 ~ N(0,1) 807 0.568
## Diff signs S (S-599.5)/10 ~ N(0,1) 611 0.2503
## Rank P (P-359700)/6932.5 ~ N(0,1) 357897 0.7948
test(sarima4$residuals)
## Null hypothesis: Residuals are iid noise.
## Test Distribution Statistic p-value
## Ljung-Box Q Q ~ chisq(20) 12.93 0.8802
## McLeod-Li Q Q ~ chisq(20) 77.18 0 *
## Turning points T (T-798.7)/14.6 ~ N(0,1) 791 0.5994
## Diff signs S (S-599.5)/10 ~ N(0,1) 614 0.1472
## Rank P (P-359700)/6932.5 ~ N(0,1) 361764 0.7659
test(sarima7$residuals)
## Null hypothesis: Residuals are iid noise.
## Test Distribution Statistic p-value
## Ljung-Box Q Q ~ chisq(20) 24.13 0.2366
## McLeod-Li Q Q ~ chisq(20) 79.8 0 *
## Turning points T (T-798.7)/14.6 ~ N(0,1) 809 0.4789
## Diff signs S (S-599.5)/10 ~ N(0,1) 611 0.2503
## Rank P (P-359700)/6932.5 ~ N(0,1) 358472 0.8594
sarimas <- list(init.sarima, sarima1, sarima2, sarima4, sarima7)
aic.sarimas <- numeric(5)
bic.sarimas <- numeric(5)
k <- 0
for (mod in sarimas) {
k <- k + 1
aic.sarimas[k] <- AIC(mod)
bic.sarimas[k] <- BIC(mod)
}
y_min <- min(aic.sarimas, bic.sarimas)
y_max <- max(aic.sarimas, bic.sarimas)
par(mfrow = c(1,2))
plot(0:4, aic.sarimas, type="b", col="blue", pch=19,
xlab="p", ylab="Criterion Value",
main="AIC and BIC of SARIMA(p,1,1)(0,1,1)",
ylim=c(y_min, y_max))
lines(0:4, bic.sarimas, type="b", col="red", pch=19)
legend("topright", legend=c("AIC", "BIC"), col=c("blue", "red"), pch=19, lty=1)
d.aic <- aic.sarimas - min(aic.sarimas)
d.bic <- bic.sarimas - min(bic.sarimas)
y_min <- min(d.aic, d.bic)
y_max <- max(d.aic, d.bic)
plot(0:4, d.aic, type="b", col="blue", pch=19,
xlab="p", ylab="Difference from Minimum",
main="Difference of AIC and BIC",
ylim=c(y_min, y_max))
lines(0:4, d.bic, type="b", col="red", pch=19)
abline(h = 2) #if the value does not exceed 2, the model show almost same performance.
legend("topright", legend=c("AIC - min(AIC)", "BIC - min(BIC)"), col=c("blue", "red"), pch=19, lty=1)
sarima1
##
## Call:
## arima(x = data, order = c(1, 0, 3), seasonal = list(order = c(2, 1, 1)))
##
## Coefficients:
## ar1 ma1 ma2 ma3 sar1 sar2 sma1
## 1 -0.7871 -0.1209 -0.0784 -0.0034 -0.0394 -0.9695
## s.e. 0 0.0290 0.0410 0.0303 0.0308 0.0305 0.0138
##
## sigma^2 estimated as 0.08244: log likelihood = -224.52, aic = 465.04
par(mfrow = c(1,1))
fc.sarima <- forecast(sarima1, h = 4)
plot(fc.sarima, xlim = c(2020,2025))
h <- 4
newx <- (n+1):(n+h)
ncos1 <- cos(f1*2*pi/n*newx); nsin1 <- sin(f1*2*pi/n*newx)
ncos2 <- cos(f2*2*pi/n*newx); nsin2 <- sin(f2*2*pi/n*newx)
ncos3 <- cos(f3*2*pi/n*newx); nsin3 <- sin(f3*2*pi/n*newx)
ncos4 <- cos(f4*2*pi/n*newx); nsin4 <- sin(f4*2*pi/n*newx)
fc.reg <- forecast(arma1, h = h, xreg = cbind(x = newx, cos1 = ncos1, sin1 = nsin1, cos2 = ncos2, sin2 = nsin2, cos3 = ncos3, sin3 = nsin3, cos4 = ncos4, sin4 = nsin4))
plot(fc.reg, xlim = c(2020,2025))
lines(time, reg1$fitted.values, col = "red")
library(knitr)
knitr::spin("2024final.R", knit = FALSE)
## [1] "2024final.Rmd"
`````