Required Library

library(zoo)
library(tseries)
library(fBasics)
library(forecast)
library(lmtest)
library(fUnitRoots)

Data Import and Preparation

setwd("~/Google Drive/csc 425 Final Project/R Code/")
source("backtest.R")
gas <- read.csv("monthly-gasoline-demand-ontario.csv")
gasTS <- ts(gas$demand, start = c(1960, 1), freq = 12)

Data Exploratory

Time Plot

plot(gasTS, xlab = "Time", ylab = "Demand", 
     main = "Monthly gasoline demand Ontario gallon millions 1960-1975")

Histogram and Q-Q plot

par(mfcol = c(1,2))
hist(gas$demand, xlab = "Demand", prob = TRUE, main = "Histogram")
xfit <- seq(min(gas$demand), max(gas$demand), length = 40)
yfit <- dnorm(xfit, mean = mean(gas$demand), sd = sd(gas$demand))
lines(xfit, yfit, col = 2, lwd = 2)
qqnorm(gas$demand, main="Q-Q plot")
qqline(gas$demand, col = 2)

par(mfcol = c(1,1))

Basic Statistics

basicStats(gas$demand)
##             X..gas.demand
## nobs         1.920000e+02
## NAs          0.000000e+00
## Minimum      8.689000e+04
## Maximum      2.559180e+05
## 1. Quartile  1.284255e+05
## 3. Quartile  1.935558e+05
## Mean         1.620637e+05
## Median       1.574590e+05
## Sum          3.111623e+07
## SE Mean      3.006687e+03
## LCL Mean     1.561331e+05
## UCL Mean     1.679943e+05
## Variance     1.735712e+09
## Stdev        4.166187e+04
## Skewness     3.199480e-01
## Kurtosis    -7.665780e-01

Checking for Stationarity and Serial Correlation

ACF & PACF

acf(gas$demand, plot = TRUE, main = "ACF of gasoline demand")

pacf(gas$demand, plot = TRUE, main = "PACF of gasoline demand")

Ljung Box Test

Box.test(gas$demand, lag = 1, type = "Ljung")
## 
##  Box-Ljung test
## 
## data:  gas$demand
## X-squared = 166.59, df = 1, p-value < 2.2e-16
Box.test(gas$demand, lag = 3, type = "Ljung")
## 
##  Box-Ljung test
## 
## data:  gas$demand
## X-squared = 438.9, df = 3, p-value < 2.2e-16
Box.test(gas$demand, lag = 5, type = "Ljung")
## 
##  Box-Ljung test
## 
## data:  gas$demand
## X-squared = 623.1, df = 5, p-value < 2.2e-16

Dickey Fuller test

adfTest(gas$demand, lags = 1, type = c("c"))
## 
## Title:
##  Augmented Dickey-Fuller Test
## 
## Test Results:
##   PARAMETER:
##     Lag Order: 1
##   STATISTIC:
##     Dickey-Fuller: -2.2212
##   P VALUE:
##     0.231 
## 
## Description:
##  Tue Nov  3 02:33:01 2015 by user:
adfTest(gas$demand, lags = 3, type = c("c"))
## 
## Title:
##  Augmented Dickey-Fuller Test
## 
## Test Results:
##   PARAMETER:
##     Lag Order: 3
##   STATISTIC:
##     Dickey-Fuller: -3.0674
##   P VALUE:
##     0.03288 
## 
## Description:
##  Tue Nov  3 02:33:01 2015 by user:
adfTest(gas$demand, lags = 5, type = c("c"))
## 
## Title:
##  Augmented Dickey-Fuller Test
## 
## Test Results:
##   PARAMETER:
##     Lag Order: 5
##   STATISTIC:
##     Dickey-Fuller: -1.4595
##   P VALUE:
##     0.5139 
## 
## Description:
##  Tue Nov  3 02:33:01 2015 by user:

Detrend and Deseasonalized

detrend <- diff(gas$demand)
acf(detrend, plot = TRUE, main = "ACF of first differencing")

deseason <- diff(detrend, 12)
acf(deseason, plot = TRUE, main = "ACF of seasonal differencing")

Modeling

Model 1

auto.arima(gasTS, ic = "bic", seasonal = FALSE)
## Series: gasTS 
## ARIMA(4,1,1) with drift         
## 
## Coefficients:
##          ar1     ar2     ar3      ar4      ma1     drift
##       0.4371  0.3398  0.0001  -0.5563  -0.9427  673.0640
## s.e.  0.0599  0.0679  0.0674   0.0598   0.0208   61.4402
## 
## sigma^2 estimated as 108124137:  log likelihood=-2039.62
## AIC=4093.24   AICc=4093.85   BIC=4116.01
m1 <- Arima(gasTS ,order = c(4,1,1), fixed = c(NA, NA, 0, NA, NA, NA),
            include.drift = TRUE, method="ML")
m1
## Series: gasTS 
## ARIMA(4,1,1) with drift         
## 
## Coefficients:
##          ar1     ar2  ar3      ar4      ma1     drift
##       0.4371  0.3398    0  -0.5563  -1.0608  673.0667
## s.e.  0.0590  0.0639    0   0.0491   0.0234   61.4375
## 
## sigma^2 estimated as 96080978:  log likelihood=-2039.62
## AIC=4091.24   AICc=4091.7   BIC=4110.76
coeftest(m1)
## 
## z test of coefficients:
## 
##         Estimate Std. Error  z value  Pr(>|z|)    
## ar1     0.437081   0.058995   7.4087 1.275e-13 ***
## ar2     0.339850   0.063893   5.3190 1.043e-07 ***
## ar4    -0.556263   0.049120 -11.3245 < 2.2e-16 ***
## ma1    -1.060822   0.023409 -45.3170 < 2.2e-16 ***
## drift 673.066730  61.437464  10.9553 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
acf(m1$residuals, main = "ACF of residuals")

Model 2

auto.arima(gasTS, ic = "bic")
## Series: gasTS 
## ARIMA(2,1,0)(0,1,1)[12]                    
## 
## Coefficients:
##           ar1      ar2     sma1
##       -0.9805  -0.6705  -0.6790
## s.e.   0.0556   0.0553   0.0758
## 
## sigma^2 estimated as 33058468:  log likelihood=-1808.08
## AIC=3624.15   AICc=3624.38   BIC=3636.9
m2 <- Arima(gasTS ,order = c(2,1,0), 
            seasonal = list(order = c(0,1,1), period = 12), 
            method="ML")
m2
## Series: gasTS 
## ARIMA(2,1,0)(0,1,1)[12]                    
## 
## Coefficients:
##           ar1      ar2     sma1
##       -0.9805  -0.6705  -0.6790
## s.e.   0.0556   0.0553   0.0758
## 
## sigma^2 estimated as 33058471:  log likelihood=-1808.08
## AIC=3624.15   AICc=3624.38   BIC=3636.9
coeftest(m2)
## 
## z test of coefficients:
## 
##       Estimate Std. Error  z value  Pr(>|z|)    
## ar1  -0.980466   0.055597 -17.6353 < 2.2e-16 ***
## ar2  -0.670529   0.055278 -12.1301 < 2.2e-16 ***
## sma1 -0.678968   0.075815  -8.9556 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
acf(m2$residuals, main = "ACF of residuals")

Model 3

auto.arima(gasTS)
## Series: gasTS 
## ARIMA(2,1,3)(0,1,1)[12]                    
## 
## Coefficients:
##           ar1      ar2     ma1      ma2      ma3     sma1
##       -1.0455  -0.8392  0.0045  -0.0224  -0.3864  -0.7207
## s.e.   0.0861   0.0987  0.1498   0.0888   0.1617   0.0756
## 
## sigma^2 estimated as 30849704:  log likelihood=-1802.59
## AIC=3619.19   AICc=3619.84   BIC=3641.5
m3 <- Arima(gasTS ,order = c(2,1,3), fixed = c(NA, NA, 0, 0, NA, NA),
            seasonal = list(order = c(0,1,1), period = 12), 
            method="ML")
m3
## Series: gasTS 
## ARIMA(2,1,3)(0,1,1)[12]                    
## 
## Coefficients:
##           ar1      ar2  ma1  ma2      ma3     sma1
##       -1.0393  -0.8432    0    0  -0.3921  -0.7225
## s.e.   0.0414   0.0539    0    0   0.1045   0.0714
## 
## sigma^2 estimated as 30864529:  log likelihood=-1802.63
## AIC=3615.25   AICc=3615.6   BIC=3631.19
coeftest(m3)
## 
## z test of coefficients:
## 
##       Estimate Std. Error  z value  Pr(>|z|)    
## ar1  -1.039259   0.041369 -25.1219 < 2.2e-16 ***
## ar2  -0.843183   0.053950 -15.6291 < 2.2e-16 ***
## ma3  -0.392088   0.104508  -3.7518 0.0001756 ***
## sma1 -0.722492   0.071375 -10.1224 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
acf(m3$residuals, main = "ACF of residuals")

par(mfcol = c(1,2))
hist(m3$residuals, xlab = "Residuals", prob = TRUE, 
     main = "Histogram of residuals")
xfit <- seq(min(m3$residuals), max(m3$residuals), length = 40)
yfit <- dnorm(xfit, mean = mean(m3$residuals), sd = sd(m3$residuals))
lines(xfit, yfit, col = 2, lwd = 2)
qqnorm(m3$residuals, main="Q-Q plot of residuals")
qqline(m3$residuals, col = 2)

par(mfcol = c(1,1))
Box.test(m3$residuals, lag = 6, type = "Ljung", fitdf = 4)
## 
##  Box-Ljung test
## 
## data:  m3$residuals
## X-squared = 1.9886, df = 2, p-value = 0.37
Box.test(m3$residuals, lag = 8, type = "Ljung", fitdf = 4)
## 
##  Box-Ljung test
## 
## data:  m3$residuals
## X-squared = 7.6767, df = 4, p-value = 0.1042
Box.test(m3$residuals, lag = 10, type = "Ljung", fitdf = 4)
## 
##  Box-Ljung test
## 
## data:  m3$residuals
## X-squared = 10.196, df = 6, p-value = 0.1166

Forecasting

f3 <- forecast.Arima(m3, h = 24)
plot(f3)

Backtesting

backtest(m3, gas$demand, 163, 1)
## [1] "RMSE of out-of-sample forecasts"
## [1] 9856.358
## [1] "Mean absolute error of out-of-sample forecasts"
## [1] 8492.045
## [1] "Mean Absolute Percentage error"
## [1] 0.03730783
## [1] "Symmetric Mean Absolute Percentage error"
## [1] 0.03803024