library(fpp2)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.6.2
## Loading required package: forecast
## Warning: package 'forecast' was built under R version 3.6.2
## Registered S3 method overwritten by 'xts':
## method from
## as.zoo.xts zoo
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Loading required package: fma
## Loading required package: expsmooth
library(vars)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following objects are masked from 'package:fma':
##
## cement, housing, petrol
## Loading required package: strucchange
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: urca
## Loading required package: lmtest
library(fGarch)
## Loading required package: timeDate
## Loading required package: timeSeries
##
## Attaching package: 'timeSeries'
## The following object is masked from 'package:zoo':
##
## time<-
## Loading required package: fBasics
# The data I choose is the consumption changes in US. It flutuates over time, and the magnitude is correlated with time.
cons <- ts(uschange[,'Consumption'][1:(nrow(uschange)-11)], start = c(1970,1),frequency = 4)
test <- ts(uschange[,'Consumption'][(nrow(uschange)-10):nrow(uschange)], end = c(2016,3),frequency = 4)
str(cons)
## Time-Series [1:176] from 1970 to 2014: 0.616 0.46 0.877 -0.274 1.897 ...
cons %>% autoplot()

# First, let's try basic linear regression:
lm <- tslm(cons~trend)
lm.fc <- lm %>% forecast(h=10)
lm.fc %>% autoplot()

checkresiduals(lm)

##
## Breusch-Godfrey test for serial correlation of order up to 8
##
## data: Residuals from Linear regression model
## LM test = 37.066, df = 8, p-value = 1.119e-05
# Then, we can decompose the data:
dec.a <- decompose(cons, type='additive')
autoplot(dec.a)

dec.m <- decompose(cons, type='multiplicative')
autoplot(dec.m)

# It seems that multiplicative model performs a better result. Becuase the magnitude changes with time.
dec <- tslm(cons~trend*season)
dec.fc <- dec %>% forecast(h=10)
dec.fc %>% autoplot()

checkresiduals(dec)

##
## Breusch-Godfrey test for serial correlation of order up to 11
##
## data: Residuals from Linear regression model
## LM test = 41.719, df = 11, p-value = 1.812e-05
# Third, ETS model is suitable to deal with the seasonal data:
ets <- ets(cons)
ets.fc <- ets %>% forecast(h=10)
ets.fc%>% autoplot()

checkresiduals(ets.fc)

##
## Ljung-Box test
##
## data: Residuals from ETS(A,N,N)
## Q* = 15.493, df = 6, p-value = 0.01675
##
## Model df: 2. Total lags used: 8
# Fourth, after ETS, let's try ARIMA model:
arima <- auto.arima(cons)
arima.fc <- arima %>% forecast(h=10)
arima.fc %>% autoplot()

checkresiduals(arima)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,3)(1,0,1)[4] with non-zero mean
## Q* = 2.5366, df = 3, p-value = 0.4687
##
## Model df: 7. Total lags used: 10
# Fifth, don't forget GARCH model:
garch_11=garchFit(data=cons, formula = ~garch(1,1), cond.dist="QMLE", trace=FALSE)
garch <- fGarch::predict(garch_11, n.ahead = 10)
garch.fc <- ts(garch$meanForecast, start = c(2014,1), frequency = 4)
cons %>% autoplot()+
garch.fc %>% autolayer()+
ggtitle('garch11 model')

# Last but not least, VAR model can help us to explain and forecast the pattern for consumption:
var <- VAR(uschange[1:(nrow(uschange)-11),]%>%as.ts(), type='both',lag.max=2, season=4)
var.fc <- forecast(var,h=10)$forecast$Consumption$mean %>% ts(start = c(2014,1), frequency = 4)
cons %>% autoplot()+
var.fc %>% autolayer()+
ggtitle('garch11 model')

accuracy(lm.fc, test)
## ME RMSE MAE MPE MAPE MASE
## Training set -3.060207e-17 0.6612945 0.4827328 80.02252 194.8755 0.7310951
## Test set 2.085976e-01 0.3140753 0.2473013 20.05354 29.1043 0.3745359
## ACF1 Theil's U
## Training set 0.33213558 NA
## Test set -0.06918467 0.8935025
accuracy(dec.fc, test)
## ME RMSE MAE MPE MAPE
## Training set -2.366810e-17 0.6567161 0.4827563 73.59702 190.17488
## Test set 2.172452e-01 0.3052703 0.2297228 22.71186 25.65666
## MASE ACF1 Theil's U
## Training set 0.7311305 0.34755397 NA
## Test set 0.3479133 -0.01022381 0.898817
accuracy(ets.fc, test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.00295636 0.6335965 0.4660425 15.65091 166.00454 0.7058176
## Test set 0.22304651 0.3243418 0.2555147 22.16502 29.85229 0.3869749
## ACF1 Theil's U
## Training set -0.001702631 NA
## Test set -0.049842383 0.9174652
accuracy(arima.fc, test)
## ME RMSE MAE MPE MAPE
## Training set 0.0008168511 0.5937631 0.4474452 70.699823 197.57965
## Test set 0.0041196347 0.2190989 0.1961173 -9.448609 28.93531
## MASE ACF1 Theil's U
## Training set 0.6776522 -0.007285859 NA
## Test set 0.2970180 -0.023962859 0.6050423
accuracy(garch.fc, test)
## ME RMSE MAE MPE MAPE ACF1
## Test set -0.1026841 0.2568888 0.2306437 -26.56899 38.81208 -0.04984238
## Theil's U
## Test set 0.5894331
accuracy(var.fc, test)
## ME RMSE MAE MPE MAPE ACF1
## Test set 0.2076786 0.3221667 0.2493535 19.89164 29.52988 -0.2244067
## Theil's U
## Test set 0.9421272
# For all these six models, arima model has the lowest RMSE, while VAR model has the lowest ACF1. It is believed that arima model and VAR model are the most suitable models for forecasting the change of concumption in US..
test %>% autoplot(series='test')+
lm.fc %>% autolayer(series='lm',PI=FALSE)+
dec.fc %>% autolayer(series='dec',PI=FALSE)+
ets.fc %>% autolayer(series='ets',PI=FALSE)+
arima.fc %>% autolayer(series='arima',PI=FALSE)+
garch.fc %>% autolayer(series='garch')+
var.fc %>% autolayer(series='var')

# This plot shows all the predicted values and acutal test data. None of this predicted line fit the test data very well. But we can find that the arima, Garch, and VAR models catch part of the actual variation better than others.