library(fpp)
## Loading required package: forecast
## Warning: package 'forecast' was built under R version 3.4.4
## Loading required package: fma
## Loading required package: expsmooth
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: tseries
## Warning: package 'tseries' was built under R version 3.4.4
library(fpp2)
## Warning: package 'fpp2' was built under R version 3.4.4
## Loading required package: ggplot2
## 
## Attaching package: 'fpp2'
## The following objects are masked from 'package:fpp':
## 
##     ausair, ausbeer, austa, austourists, debitcards, departures,
##     elecequip, euretail, guinearice, oil, sunspotarea, usmelec
setwd("/Users/brendanobrien/Downloads")
PIG.data=read.csv("/Users/brendanobrien/Downloads/monthly-total-number-of-pigs-sla.csv", header = T)
PIG.data$Month = NULL
PIG.ts<- ts(data = PIG.data, start = c(1980,1), end = c(1995,8), frequency = 12, ts.eps = getOption("ts.eps"), class ="ts")
as.ts(PIG.ts)
##         Jan    Feb    Mar    Apr    May    Jun    Jul    Aug    Sep    Oct
## 1980  76378  71947  33873  96428 105084  95741 110647 100331  94133 103055
## 1981  76889  81291  91643  96228 102736 100264 103491  97027  95240  91680
## 1982  76892  85773  95210  93771  98202  97906 100306  94089 102680  77919
## 1983  81225  88357 106175  91922 104114 109959  97880 105386  96479  97580
## 1984  90974  98981 107188  94177 115097 113696 114532 120110  93607 110925
## 1985 103069 103351 111331 106161 111590  99447 101987  85333  86970 100561
## 1986  82719  79498  74846  73819  77029  78446  86978  75878  69571  75722
## 1987  63292  59380  78332  72381  55971  69750  85472  70133  79125  85805
## 1988  69069  79556  88174  66698  72258  73445  76131  86082  75443  73969
## 1989  66269  73776  80034  70694  81823  75640  75540  82229  75345  77034
## 1990  75982  78074  77588  84100  97966  89051  93503  84747  74531  91900
## 1991  81022  78265  77271  85043  95418  79568 103283  95770  91297 101244
## 1992  93866  95171 100183 103926 102643 108387  97077  90901  90336  88732
## 1993  73292  78943  94399  92937  90130  91055 106062 103560 104075 101783
## 1994  82413  83534 109011  96499 102430 103002  91815  99067 110067 101599
## 1995  88905  89936 106723  84307 114896 106749  87892 100506              
##         Nov    Dec
## 1980  90595 101457
## 1981 101259 109564
## 1982  93561 117062
## 1983 109490 110191
## 1984 103312 120184
## 1985  89543  89265
## 1986  64182  77357
## 1987  81778  86852
## 1988  78139  78646
## 1989  78589  79769
## 1990  81635  89797
## 1991 114525 101139
## 1992  83759  99267
## 1993  93791 102313
## 1994  97646 104930
## 1995
autoplot(PIG.ts)

ggseasonplot(PIG.ts)

ggsubseriesplot(PIG.ts)

gglagplot(PIG.ts)

ggAcf(PIG.ts)

mydec <- decompose(PIG.ts)
plot(mydec)

###ETS Model

PIG.ets <- ets(PIG.ts)
summary(PIG.ets)
## ETS(A,N,A) 
## 
## Call:
##  ets(y = PIG.ts) 
## 
##   Smoothing parameters:
##     alpha = 0.3095 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 92791.1661 
##     s = 6826.521 -181.8789 991.7917 -1546.851 2155.181 5843.908
##            1723.405 3923.781 -2662.907 1882.368 -7339.192 -11616.13
## 
##   sigma:  9271.526
## 
##      AIC     AICc      BIC 
## 4434.551 4437.342 4483.097 
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE    MAPE      MASE
## Training set 67.41596 8919.631 6406.822 -1.095397 7.74615 0.6410738
##                     ACF1
## Training set 0.009262554
plot(forecast((PIG.ets), h = 12), xlab = "Time", ylab = "Pigs")

ARIMA

PIG.ARIMA <- auto.arima(PIG.ts)
summary(PIG.ARIMA)
## Series: PIG.ts 
## ARIMA(2,1,0)(2,0,0)[12] 
## 
## Coefficients:
##           ar1      ar2    sar1    sar2
##       -0.6538  -0.4952  0.3886  0.1744
## s.e.   0.0666   0.0703  0.0812  0.0878
## 
## sigma^2 estimated as 84366977:  log likelihood=-1972.03
## AIC=3954.06   AICc=3954.39   BIC=3970.21
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE    MAPE      MASE
## Training set 55.55903 9062.184 6916.883 -0.776272 7.97301 0.6921111
##                   ACF1
## Training set 0.0309038
plot(forecast((PIG.ARIMA), h = 12), xlab = "Time", ylab = "Pigs")

The ARIMA model performed slightly better according to AIC, however the ETS model performed better according to RMSE.

Test Set Forecast

Auto.ARIMA Model
PIGts.train <- window(PIG.ts, end=c(1995,2))
PIGts.test <- window(PIG.ts, start=c(1995,3))
autoplot(PIG.ts) +
  autolayer(PIGts.train, series="Training") +
  autolayer(PIGts.test, series="Test")

PIGS.AA <- auto.arima(PIGts.train)

AA.forecast  <-forecast(PIGS.AA, h=6)
  autoplot(AA.forecast) + autolayer(PIGts.test)

accuracy(AA.forecast,PIGts.test)
##                     ME     RMSE      MAE         MPE     MAPE      MASE
## Training set -104.9471 9083.374 6889.179 -1.05957977 8.038704 0.6797730
## Test set     1002.1045 9238.212 7924.024 -0.01173183 8.072235 0.7818838
##                      ACF1 Theil's U
## Training set  0.003114103        NA
## Test set     -0.415655171 0.4875635
checkresiduals(AA.forecast)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,1,2)(1,0,0)[12] with drift
## Q* = 16.161, df = 18, p-value = 0.5813
## 
## Model df: 6.   Total lags used: 24

The residuals appear uncorrelated and normal

The accuracy metrics appear to be slightly better for the training set

ETS Model
PIGS.ETS <- ets(PIGts.train)

ETS.forecast  <-forecast(PIGS.ETS, h=6)
  autoplot(ETS.forecast) + autolayer(PIGts.test)

accuracy(ETS.forecast,PIGts.test)
##                     ME      RMSE      MAE       MPE     MAPE      MASE
## Training set  100.7263  8800.972 6273.295 -1.050488 7.631907 0.6190021
## Test set     -614.9443 10522.987 9051.627 -1.775599 9.392463 0.8931473
##                     ACF1 Theil's U
## Training set  0.02304835        NA
## Test set     -0.37362076 0.5225442
checkresiduals(ETS.forecast)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(A,N,A)
## Q* = 36.01, df = 10, p-value = 8.384e-05
## 
## Model df: 14.   Total lags used: 24

The residuals appear to be uncorrelated and relatively normal.

The accuracy metrics are much better for the training set in this model, which could mean over-fitting. The metrics for the auto.ARIMA model are better than the metrics for this model. The auto.ARIMA model is better for forecasting.