library(fpp)
## Loading required package: forecast
## Warning: package 'forecast' was built under R version 3.4.4
## Loading required package: fma
## Loading required package: expsmooth
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: tseries
## Warning: package 'tseries' was built under R version 3.4.4
library(fpp2)
## Warning: package 'fpp2' was built under R version 3.4.4
## Loading required package: ggplot2
##
## Attaching package: 'fpp2'
## The following objects are masked from 'package:fpp':
##
## ausair, ausbeer, austa, austourists, debitcards, departures,
## elecequip, euretail, guinearice, oil, sunspotarea, usmelec
setwd("/Users/brendanobrien/Downloads")
PIG.data=read.csv("/Users/brendanobrien/Downloads/monthly-total-number-of-pigs-sla.csv", header = T)
PIG.data$Month = NULL
PIG.ts<- ts(data = PIG.data, start = c(1980,1), end = c(1995,8), frequency = 12, ts.eps = getOption("ts.eps"), class ="ts")
as.ts(PIG.ts)
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct
## 1980 76378 71947 33873 96428 105084 95741 110647 100331 94133 103055
## 1981 76889 81291 91643 96228 102736 100264 103491 97027 95240 91680
## 1982 76892 85773 95210 93771 98202 97906 100306 94089 102680 77919
## 1983 81225 88357 106175 91922 104114 109959 97880 105386 96479 97580
## 1984 90974 98981 107188 94177 115097 113696 114532 120110 93607 110925
## 1985 103069 103351 111331 106161 111590 99447 101987 85333 86970 100561
## 1986 82719 79498 74846 73819 77029 78446 86978 75878 69571 75722
## 1987 63292 59380 78332 72381 55971 69750 85472 70133 79125 85805
## 1988 69069 79556 88174 66698 72258 73445 76131 86082 75443 73969
## 1989 66269 73776 80034 70694 81823 75640 75540 82229 75345 77034
## 1990 75982 78074 77588 84100 97966 89051 93503 84747 74531 91900
## 1991 81022 78265 77271 85043 95418 79568 103283 95770 91297 101244
## 1992 93866 95171 100183 103926 102643 108387 97077 90901 90336 88732
## 1993 73292 78943 94399 92937 90130 91055 106062 103560 104075 101783
## 1994 82413 83534 109011 96499 102430 103002 91815 99067 110067 101599
## 1995 88905 89936 106723 84307 114896 106749 87892 100506
## Nov Dec
## 1980 90595 101457
## 1981 101259 109564
## 1982 93561 117062
## 1983 109490 110191
## 1984 103312 120184
## 1985 89543 89265
## 1986 64182 77357
## 1987 81778 86852
## 1988 78139 78646
## 1989 78589 79769
## 1990 81635 89797
## 1991 114525 101139
## 1992 83759 99267
## 1993 93791 102313
## 1994 97646 104930
## 1995
autoplot(PIG.ts)
ggseasonplot(PIG.ts)
ggsubseriesplot(PIG.ts)
gglagplot(PIG.ts)
ggAcf(PIG.ts)
mydec <- decompose(PIG.ts)
plot(mydec)
###ETS Model
PIG.ets <- ets(PIG.ts)
summary(PIG.ets)
## ETS(A,N,A)
##
## Call:
## ets(y = PIG.ts)
##
## Smoothing parameters:
## alpha = 0.3095
## gamma = 1e-04
##
## Initial states:
## l = 92791.1661
## s = 6826.521 -181.8789 991.7917 -1546.851 2155.181 5843.908
## 1723.405 3923.781 -2662.907 1882.368 -7339.192 -11616.13
##
## sigma: 9271.526
##
## AIC AICc BIC
## 4434.551 4437.342 4483.097
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 67.41596 8919.631 6406.822 -1.095397 7.74615 0.6410738
## ACF1
## Training set 0.009262554
plot(forecast((PIG.ets), h = 12), xlab = "Time", ylab = "Pigs")
PIG.ARIMA <- auto.arima(PIG.ts)
summary(PIG.ARIMA)
## Series: PIG.ts
## ARIMA(2,1,0)(2,0,0)[12]
##
## Coefficients:
## ar1 ar2 sar1 sar2
## -0.6538 -0.4952 0.3886 0.1744
## s.e. 0.0666 0.0703 0.0812 0.0878
##
## sigma^2 estimated as 84366977: log likelihood=-1972.03
## AIC=3954.06 AICc=3954.39 BIC=3970.21
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 55.55903 9062.184 6916.883 -0.776272 7.97301 0.6921111
## ACF1
## Training set 0.0309038
plot(forecast((PIG.ARIMA), h = 12), xlab = "Time", ylab = "Pigs")
The ARIMA model performed slightly better according to AIC, however the ETS model performed better according to RMSE.
PIGts.train <- window(PIG.ts, end=c(1995,2))
PIGts.test <- window(PIG.ts, start=c(1995,3))
autoplot(PIG.ts) +
autolayer(PIGts.train, series="Training") +
autolayer(PIGts.test, series="Test")
PIGS.AA <- auto.arima(PIGts.train)
AA.forecast <-forecast(PIGS.AA, h=6)
autoplot(AA.forecast) + autolayer(PIGts.test)
accuracy(AA.forecast,PIGts.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -104.9471 9083.374 6889.179 -1.05957977 8.038704 0.6797730
## Test set 1002.1045 9238.212 7924.024 -0.01173183 8.072235 0.7818838
## ACF1 Theil's U
## Training set 0.003114103 NA
## Test set -0.415655171 0.4875635
checkresiduals(AA.forecast)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,2)(1,0,0)[12] with drift
## Q* = 16.161, df = 18, p-value = 0.5813
##
## Model df: 6. Total lags used: 24
The residuals appear uncorrelated and normal
The accuracy metrics appear to be slightly better for the training set
PIGS.ETS <- ets(PIGts.train)
ETS.forecast <-forecast(PIGS.ETS, h=6)
autoplot(ETS.forecast) + autolayer(PIGts.test)
accuracy(ETS.forecast,PIGts.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 100.7263 8800.972 6273.295 -1.050488 7.631907 0.6190021
## Test set -614.9443 10522.987 9051.627 -1.775599 9.392463 0.8931473
## ACF1 Theil's U
## Training set 0.02304835 NA
## Test set -0.37362076 0.5225442
checkresiduals(ETS.forecast)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,N,A)
## Q* = 36.01, df = 10, p-value = 8.384e-05
##
## Model df: 14. Total lags used: 24
The residuals appear to be uncorrelated and relatively normal.
The accuracy metrics are much better for the training set in this model, which could mean over-fitting. The metrics for the auto.ARIMA model are better than the metrics for this model. The auto.ARIMA model is better for forecasting.