library(forecast)
## Warning: package 'forecast' was built under R version 3.4.4
library(quantmod)
## Warning: package 'quantmod' was built under R version 3.4.4
## Loading required package: xts
## Warning: package 'xts' was built under R version 3.4.4
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.4.4
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 3.4.3
## Version 0.4-0 included new data defaults. See ?getSymbols.
library(tseries)
## Warning: package 'tseries' was built under R version 3.4.4
library(timeSeries)
## Warning: package 'timeSeries' was built under R version 3.4.4
## Loading required package: timeDate
## Warning: package 'timeDate' was built under R version 3.4.4
## 
## Attaching package: 'timeSeries'
## The following object is masked from 'package:zoo':
## 
##     time<-
library(forecast)
library(xts)


souvenir <- scan("http://robjhyndman.com/tsdldata/data/fancy.dat")
souvenir <- ts(souvenir, frequency=12, start=c(1987,1))

Step 1: Plot the data

plot(souvenir)

The seasonal peaks increase over time so we use a transformation to make them of similar height.

log_souvenir = log(souvenir)
plot(log_souvenir)

plot.ts(filter(log_souvenir, sides=2, filter=rep(1/3,3)))

(wgts <- c(.5, rep(1,11), .5)/12)
##  [1] 0.04166667 0.08333333 0.08333333 0.08333333 0.08333333 0.08333333
##  [7] 0.08333333 0.08333333 0.08333333 0.08333333 0.08333333 0.08333333
## [13] 0.04166667
plot.ts(filter(log_souvenir, sides=2, filter = wgts))

Step 2: Diffs

Diff1 <- diff(log_souvenir, lag = 12)
plot.ts(Diff1) 

adf.test(Diff1) 
## 
##  Augmented Dickey-Fuller Test
## 
## data:  Diff1
## Dickey-Fuller = -2.6959, Lag order = 4, p-value = 0.292
## alternative hypothesis: stationary

D = 1

diff1 <- diff(Diff1, lag = 1)
plot.ts(diff1)

adf.test(diff1)
## Warning in adf.test(diff1): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff1
## Dickey-Fuller = -5.3558, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary

d = 1

Step 3

Acf(diff1)

q=1, Q = 0 (or maybe 1)

Pacf(diff1)

p=1, P = 0

Step 4: Estimate model

mod1 <- arima(log_souvenir, order = c(1,1,1), season = list( order = c( 0,1,0), period=12))

Step 5

summary(mod1)
## 
## Call:
## arima(x = log_souvenir, order = c(1, 1, 1), seasonal = list(order = c(0, 1, 
##     0), period = 12))
## 
## Coefficients:
##          ar1      ma1
##       0.2020  -0.7403
## s.e.  0.2614   0.2064
## 
## sigma^2 estimated as 0.03695:  log likelihood = 16.09,  aic = -26.17
## 
## Training set error measures:
##                        ME      RMSE       MAE        MPE     MAPE
## Training set -0.007853116 0.1767525 0.1334457 -0.1052367 1.455206
##                   MASE        ACF1
## Training set 0.3309956 -0.03525604
Box.test(mod1$residuals, type = "Ljung")
## 
##  Box-Ljung test
## 
## data:  mod1$residuals
## X-squared = 0.10818, df = 1, p-value = 0.7422
automod <- auto.arima(log_souvenir)
automod
## Series: log_souvenir 
## ARIMA(2,0,0)(1,1,0)[12] with drift 
## 
## Coefficients:
##          ar1     ar2     sar1   drift
##       0.3493  0.3602  -0.3278  0.0247
## s.e.  0.1086  0.1159   0.1334  0.0044
## 
## sigma^2 estimated as 0.03182:  log likelihood=23.04
## AIC=-36.09   AICc=-35.18   BIC=-24.71