# load necessary packages
library(Quandl)
## Warning: package 'Quandl' was built under R version 3.3.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 3.3.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.3.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(tidyquant)
## Warning: package 'tidyquant' was built under R version 3.3.3
## Loading required package: lubridate
## Warning: package 'lubridate' was built under R version 3.3.3
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
## Loading required package: PerformanceAnalytics
## Warning: package 'PerformanceAnalytics' was built under R version 3.3.3
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
## Loading required package: quantmod
## Warning: package 'quantmod' was built under R version 3.3.3
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 3.3.3
## Version 0.4-0 included new data defaults. See ?getSymbols.
## Loading required package: tidyverse
## Warning: package 'tidyverse' was built under R version 3.3.3
## -- Attaching packages ---------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1 v purrr 0.2.4
## v tibble 1.4.2 v dplyr 0.7.4
## v tidyr 0.8.0 v stringr 1.2.0
## v readr 1.1.1 v forcats 0.2.0
## Warning: package 'ggplot2' was built under R version 3.3.3
## Warning: package 'tibble' was built under R version 3.3.3
## Warning: package 'tidyr' was built under R version 3.3.3
## Warning: package 'readr' was built under R version 3.3.3
## Warning: package 'purrr' was built under R version 3.3.3
## Warning: package 'dplyr' was built under R version 3.3.3
## Warning: package 'stringr' was built under R version 3.3.3
## Warning: package 'forcats' was built under R version 3.3.3
## -- Conflicts ------------------------------------- tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x dplyr::first() masks xts::first()
## x lubridate::intersect() masks base::intersect()
## x dplyr::lag() masks stats::lag()
## x dplyr::last() masks xts::last()
## x lubridate::setdiff() masks base::setdiff()
## x lubridate::union() masks base::union()
##
## Attaching package: 'tidyquant'
## The following object is masked from 'package:dplyr':
##
## as_tibble
## The following object is masked from 'package:tibble':
##
## as_tibble
library(forecast)
## Warning: package 'forecast' was built under R version 3.3.3
library(readr)
library(dplyr)
library(tidyr)
library(purrr)
library(ggplot2)
library(ggfortify)
## Warning: package 'ggfortify' was built under R version 3.3.3
library(zoo)
library(timetk)
## Warning: package 'timetk' was built under R version 3.3.3
library(tibbletime)
## Warning: package 'tibbletime' was built under R version 3.3.3
##
## Attaching package: 'tibbletime'
## The following object is masked from 'package:stats':
##
## filter
library(lubridate)
library(forecast)
library(broom)
## Warning: package 'broom' was built under R version 3.3.3
library(sweep)
## Warning: package 'sweep' was built under R version 3.3.3
# Read The Data
# obtain data on Total Nonfarm Payroll Employment, Not Seasonally Adjusted
data.tbl <-
tq_get("FRED/PAYNSA",
get = "Quandl",
from = "1975-01-01",
to = "2017-12-12") %>%
rename(E = value)
str(data.tbl)
## Classes 'tbl_df', 'tbl' and 'data.frame': 516 obs. of 2 variables:
## $ date: Date, format: "1975-01-01" "1975-02-01" ...
## $ E : num 76189 75764 75808 76142 76774 ...
## - attr(*, "freq")= chr "monthly"
# Connstruct the following transformed time seriesÂ
# 1. change in Total Nonfarm Payroll Employment dE = E-E(-1)
# 2. log of Total Nonfarm Payroll Employment lE=log(E)Â
# 3. log change in Total Nonfarm Payroll Employment dlE =lE-lE(-1)Â
# 4. 12 month log change in Total Nonfarm Payroll Employment d12lE =lE-lE(-12)Â
# 5. twice differenced Total Nonfarm Payroll Employment dd12lE=d12lE-d12lE(-1)Â
data.tbl <-
tq_get("FRED/PAYNSA",
get = "Quandl",
from = "1975-01-01",
to = "2017-12-12") %>%
rename(E = value) %>%
mutate(dE = E - lag(E),
lE = log(E),
dlE = lE - lag(lE),
d12lE = lE - lag(lE, 12),
dd12lE = d12lE - lag(d12lE))
## Warning: package 'bindrcpp' was built under R version 3.3.3
str(data.tbl)
## Classes 'tbl_df', 'tbl' and 'data.frame': 516 obs. of 7 variables:
## $ date : Date, format: "1975-01-01" "1975-02-01" ...
## $ E : num 76189 75764 75808 76142 76774 ...
## $ dE : num NA -425 44 334 632 496 -678 528 606 557 ...
## $ lE : num 11.2 11.2 11.2 11.2 11.2 ...
## $ dlE : num NA -0.005594 0.000581 0.004396 0.008266 ...
## $ d12lE : num NA NA NA NA NA NA NA NA NA NA ...
## $ dd12lE: num NA NA NA NA NA NA NA NA NA NA ...
# Plot the origion and the transformed time series, command on thier trends, volatility, and seasonal patterns
# set default theme for ggplot2
theme_set(theme_bw())
# plot time series: levels, logs, differences
data.tbl %>%
gather(variable, value, -date) %>%
mutate(variable.f = factor(variable, ordered = TRUE,
levels = c("E", "lE", "dE", "dlE", "d12lE", "dd12lE"),
labels = c("E", "log(E)",
expression(paste(Delta,"E")),
expression(paste(Delta,"log(E)")),
expression(paste(Delta[12],"log(E)")),
expression(paste(Delta,Delta[12],"log(E)"))))) %>%
ggplot(aes(x = date, y = value)) +
geom_hline(aes(yintercept = 0), linetype = "dotted") +
geom_line() +
scale_x_yearmon() +
labs(x = "", y = "") +
# facet_wrap(~variable.f, ncol = 3, scales = "free_y")
facet_wrap(~variable.f, ncol = 3, scales = "free_y", labeller = label_parsed)

# Plot ACF and PACF for lE, dlE, d12lE, dd12lE. Comment on their shape
maxlag <-60
par(mfrow=c(2,4))
Acf(data.tbl$lE, type="correlation", lag.max = maxlag, ylab = "", main = expression(paste("ACF for log(E)")))
Acf(data.tbl$dlE, type="correlation", lag.max = maxlag, ylab = "", main = expression(paste("ACF for ", Delta,"log(E)")))
Acf(data.tbl$d12lE, type="correlation", lag.max = maxlag, ylab = "", main = expression(paste("ACF for ", Delta[12], "log(E)")))
Acf(data.tbl$dd12lE, type="correlation", lag.max = maxlag, ylab = "", main = expression(paste("ACF for ", Delta, Delta[12], "log(E)")))
Acf(data.tbl$lE, type="partial", lag.max = maxlag, ylab = "", main = expression(paste("PACF for log(E)")))
Acf(data.tbl$dlE, type="partial", lag.max = maxlag, ylab = "", main = expression(paste("PACF for ", Delta, "log(E)")))
Acf(data.tbl$d12lE, type="partial", lag.max = maxlag, ylab = "", main = expression(paste("PACF for ", Delta[12], "log( E)")))
Acf(data.tbl$dd12lE, type="partial", lag.max = maxlag, ylab = "", main = expression(paste("PACF for ", Delta,Delta[12], "log(E)")))

# Perform the ADF and KPSS tests on lE, d12lE, dd12lE. Summarize the results
# for lE
lE = ts(data.tbl$E, start=1975, frequency=12)
library(urca)
## Warning: package 'urca' was built under R version 3.3.3
ur.df(lE, type="trend") %>% summary()
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression trend
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3686.7 -96.0 274.8 593.5 1454.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.174e+03 7.365e+02 2.952 0.00331 **
## z.lag.1 -2.439e-02 8.914e-03 -2.736 0.00643 **
## tt 3.035e+00 1.203e+00 2.522 0.01198 *
## z.diff.lag 1.021e-01 4.396e-02 2.323 0.02059 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 945 on 510 degrees of freedom
## Multiple R-squared: 0.02339, Adjusted R-squared: 0.01764
## F-statistic: 4.071 on 3 and 510 DF, p-value: 0.007112
##
##
## Value of test-statistic is: -2.7361 5.7223 3.9196
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau3 -3.96 -3.41 -3.12
## phi2 6.09 4.68 4.03
## phi3 8.27 6.25 5.34
ur.kpss(lE, type = "mu") %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 6 lags.
##
## Value of test-statistic is: 7.2113
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
# for d12lE
d12lE = ts(data.tbl$d12lE, start=1975, frequency=12)
ur.df(na.omit(d12lE), type="drift") %>% summary()
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression drift
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0116788 -0.0011131 0.0000672 0.0010520 0.0150024
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0001836 0.0001221 1.503 0.1335
## z.lag.1 -0.0130988 0.0051238 -2.556 0.0109 *
## z.diff.lag 0.4829601 0.0385611 12.525 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.002097 on 499 degrees of freedom
## Multiple R-squared: 0.2426, Adjusted R-squared: 0.2396
## F-statistic: 79.93 on 2 and 499 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -2.5565 3.2843
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau2 -3.43 -2.86 -2.57
## phi1 6.43 4.59 3.78
ur.kpss(d12lE, type = "mu") %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 5 lags.
##
## Value of test-statistic is: 1.1682
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
# for dd12lE
dd12lE = ts(data.tbl$dd12lE, start=1975, frequency=12)
ur.df(na.omit(dd12lE), type="drift") %>% summary()
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression drift
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0106539 -0.0010717 0.0000114 0.0010181 0.0117633
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.589e-05 8.778e-05 -0.295 0.768
## z.lag.1 -3.501e-01 4.213e-02 -8.310 9.16e-16 ***
## z.diff.lag -3.576e-01 4.115e-02 -8.689 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.001965 on 498 degrees of freedom
## Multiple R-squared: 0.3671, Adjusted R-squared: 0.3645
## F-statistic: 144.4 on 2 and 498 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -8.3098 34.5598
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau2 -3.43 -2.86 -2.57
## phi1 6.43 4.59 3.78
ur.kpss(dd12lE, type = "mu") %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 5 lags.
##
## Value of test-statistic is: 0.0356
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
# Summary The Results
# The results of ADF test and KPSS show that LE is stationary at level, and d12lE is not stationary.
# The results of ADF test for dd12lE show that it is stationary at level, but the results of
# KPSS aaccept the null hypothesis menaing that the series is non-stationary at level and need to take difference.
# Split the sample into two parts: estimation sample from 1975M1 to 2014M12, and prediction sample from 2015M1 to 2017M12.Â
# Use ACF and PACF from (c) to identify and estimate a suitable model for dd12lE using Arima.
# Check the estimated model for adequacyÂ
# plot inverted AR and MA roots to check stationarity and invertibility using plot, diagnose residuals using ggtsdiag.Â
## split sample into two parts - estimation sample and prediction sample
dd12lE1 <-window(dd12lE, start=1976 +(2/12), end = 2014, frequency=12) # Series in Sub-Sample One
autoplot(dd12lE1, main = "Twice differenced Total Nonfarm Payroll Employment in Sub-Sample One, 1976M2-2014M12")

dd12lE2 <-window(dd12lE, start=2015, end = 2017, frequency=12) # Series in Sub-Sample two
autoplot(dd12lE2, main = "Twice differenced Total Nonfarm Payroll Employment in Sub-Sample two, 2015M1-2017M12")

## find the best model using ARIMA modelÂ
maxlag <-60
par(mfrow=c(2,1))
Acf(dd12lE1, type="correlation", lag.max = maxlag, ylab = "", main = expression(paste("ACF for ", Delta, Delta[12], "log(E)")))
Acf(dd12lE1, type="partial", lag.max = maxlag, ylab = "", main = expression(paste("PACF for ", Delta,Delta[12], "log(E)")))

ur.df(na.omit(dd12lE1), type="drift") %>% summary()
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression drift
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0106809 -0.0011277 0.0000061 0.0010229 0.0117647
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0000290 0.0000953 -0.304 0.761
## z.lag.1 -0.3436655 0.0443251 -7.753 6.03e-14 ***
## z.diff.lag -0.3587606 0.0434891 -8.249 1.77e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.002028 on 450 degrees of freedom
## Multiple R-squared: 0.3631, Adjusted R-squared: 0.3602
## F-statistic: 128.2 on 2 and 450 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -7.7533 30.0819
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau2 -3.44 -2.87 -2.57
## phi1 6.47 4.61 3.79
ur.kpss(dd12lE1, type = "mu") %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 5 lags.
##
## Value of test-statistic is: 0.0415
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
library(tseries)
m1 <- arima(dd12lE1, order=c(1,0,0), seasonal=list(order=c(1,0,1), period = 12))
m1
##
## Call:
## arima(x = dd12lE1, order = c(1, 0, 0), seasonal = list(order = c(1, 0, 1), period = 12))
##
## Coefficients:
## ar1 sar1 sma1 intercept
## 0.5322 0.1803 -0.8287 0e+00
## s.e. 0.0421 0.0610 0.0330 1e-04
##
## sigma^2 estimated as 3.382e-06: log likelihood = 2214.83, aic = -4419.65
m2 <- arima(dd12lE1, order=c(2,0,0), seasonal=list(order=c(1,0,1), period = 12))
m2
##
## Call:
## arima(x = dd12lE1, order = c(2, 0, 0), seasonal = list(order = c(1, 0, 1), period = 12))
##
## Coefficients:
## ar1 ar2 sar1 sma1 intercept
## 0.3426 0.3802 0.1412 -0.8071 0e+00
## s.e. 0.0441 0.0442 0.0610 0.0334 1e-04
##
## sigma^2 estimated as 2.911e-06: log likelihood = 2249.02, aic = -4486.03
m3 <- arima(dd12lE1, order=c(3,0,0), seasonal=list(order=c(1,0,1), period = 12))
m3
##
## Call:
## arima(x = dd12lE1, order = c(3, 0, 0), seasonal = list(order = c(1, 0, 1), period = 12))
##
## Coefficients:
## ar1 ar2 ar3 sar1 sma1 intercept
## 0.2594 0.3103 0.2134 0.1336 -0.8243 0e+00
## s.e. 0.0469 0.0460 0.0478 0.0593 0.0318 1e-04
##
## sigma^2 estimated as 2.783e-06: log likelihood = 2258.77, aic = -4503.55
# Based on AIC, model 3, ARIMA(3,0,0)(1,0,1)[12] is the best model.
library(lmtest)
## Warning: package 'lmtest' was built under R version 3.3.3
# M3 has significant coefficients, then we do not need to restrict model.
coeftest(m1)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 5.3220e-01 4.2133e-02 12.6313 < 2.2e-16 ***
## sar1 1.8026e-01 6.1048e-02 2.9528 0.003149 **
## sma1 -8.2871e-01 3.3029e-02 -25.0905 < 2.2e-16 ***
## intercept -4.4195e-05 6.4474e-05 -0.6855 0.493051
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(m2)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 3.4258e-01 4.4071e-02 7.7733 7.645e-15 ***
## ar2 3.8019e-01 4.4209e-02 8.5997 < 2.2e-16 ***
## sar1 1.4124e-01 6.0956e-02 2.3171 0.0205 *
## sma1 -8.0710e-01 3.3438e-02 -24.1375 < 2.2e-16 ***
## intercept -3.0683e-05 8.6363e-05 -0.3553 0.7224
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(m3)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 2.5938e-01 4.6929e-02 5.5270 3.257e-08 ***
## ar2 3.1028e-01 4.5997e-02 6.7456 1.524e-11 ***
## ar3 2.1339e-01 4.7789e-02 4.4652 8.001e-06 ***
## sar1 1.3358e-01 5.9330e-02 2.2515 0.02435 *
## sma1 -8.2431e-01 3.1771e-02 -25.9450 < 2.2e-16 ***
## intercept -2.4330e-05 9.5052e-05 -0.2560 0.79798
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Accuracy
accuracy(m3)
## ME RMSE MAE MPE MAPE
## Training set -8.931781e-07 0.001668286 0.001200889 31.87307 252.4614
## MASE ACF1
## Training set 0.6797329 -0.009641745
# Inverted AR and MA roots
plot(m3)

# Diagnose residulas using ggtsdiag
nlags <- 60
ggtsdiag(m3, gof.lag = nlags)

# Based on the residuals, m3 has highly significant p-value.
# Then, the best model is
# ARIMA(3,0,0)(1,0,1)[12]
# Split the sample into two parts: estimation sample from 1975M1 to 2014M12, and prediction sample from 2015M1 to 2017M12.Â
# Use ACF and PACF from (c) to identify and estimate a suitable model for lE using Arima.
# Check the estimated model for adequacyÂ
# plot inverted AR and MA roots to check stationarity and invertibility using plot, diagnose residuals using ggtsdiag.Â
## split sample into two parts - estimation sample and prediction sample
lE1 <-window(lE, start=1975, end = 2014, frequency=12) # Series in Sub-Sample One
autoplot(lE1, main = "log of Total Nonfarm Payroll Employment in Sub-Sample One, 1975M1-2014M12")

lE2 <-window(lE, start=2015, end = 2017, frequency=12) # Series in Sub-Sample two
autoplot(lE2, main = "log of Total Nonfarm Payroll Employment in Sub-Sample two, 2015M1-2017M12")

## find the best model using ARIMA modelÂ
# We need to test ACF, PACF, ADF, KPSS test in sub-sample one.
maxlag <-60
par(mfrow=c(2,1))
Acf(lE1, type="correlation", lag.max = maxlag, ylab = "", main = expression(paste("ACF for log(E)")))
Acf(lE1, type="partial", lag.max = maxlag, ylab = "", main = expression(paste("ACF for log(E)")))

ur.df(na.omit(lE1), type="drift") %>% summary()
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression drift
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3615.9 -110.3 279.7 589.4 1541.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 558.304280 263.807269 2.116 0.0348 *
## z.lag.1 -0.003911 0.002293 -1.706 0.0887 .
## z.diff.lag 0.098674 0.046525 2.121 0.0345 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 941.4 on 464 degrees of freedom
## Multiple R-squared: 0.01604, Adjusted R-squared: 0.0118
## F-statistic: 3.781 on 2 and 464 DF, p-value: 0.02349
##
##
## Value of test-statistic is: -1.706 4.8441
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau2 -3.44 -2.87 -2.57
## phi1 6.47 4.61 3.79
ur.kpss(lE1, type = "mu") %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 5 lags.
##
## Value of test-statistic is: 7.6967
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
# Series: lE1
# ARIMA(1,0,4) with non-zero mean
m4 <- auto.arima(lE1, ic="aic", seasonal=TRUE, stationary=TRUE, stepwise=FALSE, approximation=FALSE)
m4
## Series: lE1
## ARIMA(1,0,4) with non-zero mean
##
## Coefficients:
## ar1 ma1 ma2 ma3 ma4 mean
## 0.9668 1.0063 1.2164 1.0006 0.8593 112305.553
## s.e. 0.0118 0.0304 0.0359 0.0273 0.0274 6694.842
##
## sigma^2 estimated as 1024561: log likelihood=-3914.27
## AIC=7842.55 AICc=7842.79 BIC=7871.6
# Based on auto.arima, model ARIMA(1,0,4) is the best model in sub-sample one.
# M4 has significant coefficients, then we dont need to restrict model.
coeftest(m4)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 9.6682e-01 1.1790e-02 82.005 < 2.2e-16 ***
## ma1 1.0063e+00 3.0423e-02 33.076 < 2.2e-16 ***
## ma2 1.2164e+00 3.5923e-02 33.861 < 2.2e-16 ***
## ma3 1.0006e+00 2.7269e-02 36.692 < 2.2e-16 ***
## ma4 8.5925e-01 2.7432e-02 31.323 < 2.2e-16 ***
## intercept 1.1231e+05 6.6948e+03 16.775 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Accuracy
accuracy(m4)
## ME RMSE MAE MPE MAPE MASE
## Training set 26.75893 1005.711 749.3217 0.006342968 0.6700129 0.3294454
## ACF1
## Training set -0.2791811
# Inverted AR and MA roots
plot(m4)

# Diagnose residulas using ggtsdiag
nlags <- 60
ggtsdiag(m4, gof.lag = nlags)

# Based on the residuals, m4 has not highly significant p-value, then we need to find the best model
m5 <- arima(lE1, order=c(1,2,1), seasonal=list(order=c(2,1,1), period = 12))
m5
##
## Call:
## arima(x = lE1, order = c(1, 2, 1), seasonal = list(order = c(2, 1, 1), period = 12))
##
## Coefficients:
## ar1 ma1 sar1 sar2 sma1
## -0.1659 -0.5140 0.0595 -0.0260 -0.6735
## s.e. 0.0766 0.0689 0.0701 0.0583 0.0517
##
## sigma^2 estimated as 34425: log likelihood = -3028.66, aic = 6069.31
# Accuracy in both Sub-Samples
accuracy(m5)
## ME RMSE MAE MPE MAPE MASE
## Training set -5.640952 182.8673 137.406 -0.0064589 0.1265387 0.1872851
## ACF1
## Training set -0.007198373
# Inverted AR and MA roots in Sub-Sample OneÂ
plot(m5)

# Diagnose residulas using ggtsdiag in both Sub-SamplesÂ
nlags <- 60
ggtsdiag(m5, gof.lag = nlags)

# Based on the residuals, m5 has highly significant p-value than m4.
# Then the best model is ARIMA(1,2,1)(2,1,1)[12]
# Use the rolling scheme to generate a sequence of 1 period ahead forecasts for the prediction subsample, 2015M1-2018M12 using the same model specification as in (f).
# Multistep Ahead Forecasting
end <- 2014
forecasting.length <- window(lE, start = end + (1/12))
m5.f.h <- forecast(m5, length(forecasting.length))
autoplot(m5.f.h)

# 1 month Ahead Recursive Forecasting
m5.Rol.F <- zoo()
fstQ <- 1975
lstQ <- 2014
for(i in 1:length(lE2)) {
temp <- window(lE, start=fstQ + (i-1)/12, end = lstQ + (i-1)/12)
m5.update <- arima(temp, order = c(1,2,1), seasonal = list(order = c(2,1,1), period = 12))
m5.Rol.F <- c(m5.Rol.F, forecast(m5.update, 1)$mean)
}
m5.Rol.F <- as.ts(m5.Rol.F)
plot(m5.Rol.F)

# Plot the forecast from (g) together with its confidence intervals and the actual data for the period 2008M1-2017M12.
par(mfrow=c(2,1))
plot(m5.f.h, xlim=c(2008,2017), main="Multistep Ahead Forecasts")
lines(m5.f.h$mean, type="p", pch=16, lty="dashed", col="blue")
lines(lE, pch=16, lty="dashed")
plot(m5.Rol.F, xlim=c(2008,2017), main = "1-step Ahead Recursive Forecasts")
lines(m5.Rol.F, type="p", pch=16, lty="solid", col="red")
lines(lE,pch=16, lty="dashed")

# Forecast Error
par(mfrow=c(2,1))
plot(lE - m5.f.h$mean, main = "Forecast Error: Multistep Ahead Forecasts")
abline(h=0, lty="dashed")
plot(lE - m5.Rol.F, main = "Forecast Error: 1-Month Ahead Recursive Forecasts")
abline(h=0, lty="dashed")

# test for accuracy
accuracy(m5.f.h$mean, lE)
## ME RMSE MAE MPE MAPE ACF1
## Test set 84.94892 552.2811 469.5141 0.06464511 0.3263059 0.9066706
## Theil's U
## Test set 0.5260941
accuracy(m5.Rol.F, lE)
## ME RMSE MAE MPE MAPE ACF1
## Test set -8.024886 127.0852 106.4006 -0.00590278 0.07569034 -0.234191
## Theil's U
## Test set 0.1121559
# The results of forecasting show that 1-month Ahead Recursive Forecasting is better than multistep Ahead Forecasting.