library(zoo)
library(tseries)
library(fBasics)
library(forecast)
library(lmtest)
library(fUnitRoots)
setwd("~/Google Drive/csc 425 Final Project/R Code/")
source("backtest.R")
gas <- read.csv("monthly-gasoline-demand-ontario.csv")
gasTS <- ts(gas$demand, start = c(1960, 1), freq = 12)
Time Plot
plot(gasTS, xlab = "Time", ylab = "Demand",
main = "Monthly gasoline demand Ontario gallon millions 1960-1975")
Histogram and Q-Q plot
par(mfcol = c(1,2))
hist(gas$demand, xlab = "Demand", prob = TRUE, main = "Histogram")
xfit <- seq(min(gas$demand), max(gas$demand), length = 40)
yfit <- dnorm(xfit, mean = mean(gas$demand), sd = sd(gas$demand))
lines(xfit, yfit, col = 2, lwd = 2)
qqnorm(gas$demand, main="Q-Q plot")
qqline(gas$demand, col = 2)
par(mfcol = c(1,1))
Basic Statistics
basicStats(gas$demand)
## X..gas.demand
## nobs 1.920000e+02
## NAs 0.000000e+00
## Minimum 8.689000e+04
## Maximum 2.559180e+05
## 1. Quartile 1.284255e+05
## 3. Quartile 1.935558e+05
## Mean 1.620637e+05
## Median 1.574590e+05
## Sum 3.111623e+07
## SE Mean 3.006687e+03
## LCL Mean 1.561331e+05
## UCL Mean 1.679943e+05
## Variance 1.735712e+09
## Stdev 4.166187e+04
## Skewness 3.199480e-01
## Kurtosis -7.665780e-01
ACF & PACF
acf(gas$demand, plot = TRUE, main = "ACF of gasoline demand")
pacf(gas$demand, plot = TRUE, main = "PACF of gasoline demand")
Ljung Box Test
Box.test(gas$demand, lag = 1, type = "Ljung")
##
## Box-Ljung test
##
## data: gas$demand
## X-squared = 166.59, df = 1, p-value < 2.2e-16
Box.test(gas$demand, lag = 3, type = "Ljung")
##
## Box-Ljung test
##
## data: gas$demand
## X-squared = 438.9, df = 3, p-value < 2.2e-16
Box.test(gas$demand, lag = 5, type = "Ljung")
##
## Box-Ljung test
##
## data: gas$demand
## X-squared = 623.1, df = 5, p-value < 2.2e-16
Dickey Fuller test
adfTest(gas$demand, lags = 1, type = c("c"))
##
## Title:
## Augmented Dickey-Fuller Test
##
## Test Results:
## PARAMETER:
## Lag Order: 1
## STATISTIC:
## Dickey-Fuller: -2.2212
## P VALUE:
## 0.231
##
## Description:
## Tue Nov 3 02:33:01 2015 by user:
adfTest(gas$demand, lags = 3, type = c("c"))
##
## Title:
## Augmented Dickey-Fuller Test
##
## Test Results:
## PARAMETER:
## Lag Order: 3
## STATISTIC:
## Dickey-Fuller: -3.0674
## P VALUE:
## 0.03288
##
## Description:
## Tue Nov 3 02:33:01 2015 by user:
adfTest(gas$demand, lags = 5, type = c("c"))
##
## Title:
## Augmented Dickey-Fuller Test
##
## Test Results:
## PARAMETER:
## Lag Order: 5
## STATISTIC:
## Dickey-Fuller: -1.4595
## P VALUE:
## 0.5139
##
## Description:
## Tue Nov 3 02:33:01 2015 by user:
Detrend and Deseasonalized
detrend <- diff(gas$demand)
acf(detrend, plot = TRUE, main = "ACF of first differencing")
deseason <- diff(detrend, 12)
acf(deseason, plot = TRUE, main = "ACF of seasonal differencing")
Model 1
auto.arima(gasTS, ic = "bic", seasonal = FALSE)
## Series: gasTS
## ARIMA(4,1,1) with drift
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 drift
## 0.4371 0.3398 0.0001 -0.5563 -0.9427 673.0640
## s.e. 0.0599 0.0679 0.0674 0.0598 0.0208 61.4402
##
## sigma^2 estimated as 108124137: log likelihood=-2039.62
## AIC=4093.24 AICc=4093.85 BIC=4116.01
m1 <- Arima(gasTS ,order = c(4,1,1), fixed = c(NA, NA, 0, NA, NA, NA),
include.drift = TRUE, method="ML")
m1
## Series: gasTS
## ARIMA(4,1,1) with drift
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 drift
## 0.4371 0.3398 0 -0.5563 -1.0608 673.0667
## s.e. 0.0590 0.0639 0 0.0491 0.0234 61.4375
##
## sigma^2 estimated as 96080978: log likelihood=-2039.62
## AIC=4091.24 AICc=4091.7 BIC=4110.76
coeftest(m1)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.437081 0.058995 7.4087 1.275e-13 ***
## ar2 0.339850 0.063893 5.3190 1.043e-07 ***
## ar4 -0.556263 0.049120 -11.3245 < 2.2e-16 ***
## ma1 -1.060822 0.023409 -45.3170 < 2.2e-16 ***
## drift 673.066730 61.437464 10.9553 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
acf(m1$residuals, main = "ACF of residuals")
Model 2
auto.arima(gasTS, ic = "bic")
## Series: gasTS
## ARIMA(2,1,0)(0,1,1)[12]
##
## Coefficients:
## ar1 ar2 sma1
## -0.9805 -0.6705 -0.6790
## s.e. 0.0556 0.0553 0.0758
##
## sigma^2 estimated as 33058468: log likelihood=-1808.08
## AIC=3624.15 AICc=3624.38 BIC=3636.9
m2 <- Arima(gasTS ,order = c(2,1,0),
seasonal = list(order = c(0,1,1), period = 12),
method="ML")
m2
## Series: gasTS
## ARIMA(2,1,0)(0,1,1)[12]
##
## Coefficients:
## ar1 ar2 sma1
## -0.9805 -0.6705 -0.6790
## s.e. 0.0556 0.0553 0.0758
##
## sigma^2 estimated as 33058471: log likelihood=-1808.08
## AIC=3624.15 AICc=3624.38 BIC=3636.9
coeftest(m2)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 -0.980466 0.055597 -17.6353 < 2.2e-16 ***
## ar2 -0.670529 0.055278 -12.1301 < 2.2e-16 ***
## sma1 -0.678968 0.075815 -8.9556 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
acf(m2$residuals, main = "ACF of residuals")
Model 3
auto.arima(gasTS)
## Series: gasTS
## ARIMA(2,1,3)(0,1,1)[12]
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3 sma1
## -1.0455 -0.8392 0.0045 -0.0224 -0.3864 -0.7207
## s.e. 0.0861 0.0987 0.1498 0.0888 0.1617 0.0756
##
## sigma^2 estimated as 30849704: log likelihood=-1802.59
## AIC=3619.19 AICc=3619.84 BIC=3641.5
m3 <- Arima(gasTS ,order = c(2,1,3), fixed = c(NA, NA, 0, 0, NA, NA),
seasonal = list(order = c(0,1,1), period = 12),
method="ML")
m3
## Series: gasTS
## ARIMA(2,1,3)(0,1,1)[12]
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3 sma1
## -1.0393 -0.8432 0 0 -0.3921 -0.7225
## s.e. 0.0414 0.0539 0 0 0.1045 0.0714
##
## sigma^2 estimated as 30864529: log likelihood=-1802.63
## AIC=3615.25 AICc=3615.6 BIC=3631.19
coeftest(m3)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 -1.039259 0.041369 -25.1219 < 2.2e-16 ***
## ar2 -0.843183 0.053950 -15.6291 < 2.2e-16 ***
## ma3 -0.392088 0.104508 -3.7518 0.0001756 ***
## sma1 -0.722492 0.071375 -10.1224 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
acf(m3$residuals, main = "ACF of residuals")
par(mfcol = c(1,2))
hist(m3$residuals, xlab = "Residuals", prob = TRUE,
main = "Histogram of residuals")
xfit <- seq(min(m3$residuals), max(m3$residuals), length = 40)
yfit <- dnorm(xfit, mean = mean(m3$residuals), sd = sd(m3$residuals))
lines(xfit, yfit, col = 2, lwd = 2)
qqnorm(m3$residuals, main="Q-Q plot of residuals")
qqline(m3$residuals, col = 2)
par(mfcol = c(1,1))
Box.test(m3$residuals, lag = 6, type = "Ljung", fitdf = 4)
##
## Box-Ljung test
##
## data: m3$residuals
## X-squared = 1.9886, df = 2, p-value = 0.37
Box.test(m3$residuals, lag = 8, type = "Ljung", fitdf = 4)
##
## Box-Ljung test
##
## data: m3$residuals
## X-squared = 7.6767, df = 4, p-value = 0.1042
Box.test(m3$residuals, lag = 10, type = "Ljung", fitdf = 4)
##
## Box-Ljung test
##
## data: m3$residuals
## X-squared = 10.196, df = 6, p-value = 0.1166
f3 <- forecast.Arima(m3, h = 24)
plot(f3)
backtest(m3, gas$demand, 163, 1)
## [1] "RMSE of out-of-sample forecasts"
## [1] 9856.358
## [1] "Mean absolute error of out-of-sample forecasts"
## [1] 8492.045
## [1] "Mean Absolute Percentage error"
## [1] 0.03730783
## [1] "Symmetric Mean Absolute Percentage error"
## [1] 0.03803024