Load

library(readxl)
library(httr)
library(fpp3)
## -- Attaching packages -------------------------------------------- fpp3 0.4.0 --
## v tibble      3.0.6       v tsibble     0.9.3  
## v dplyr       1.0.4       v tsibbledata 0.2.0  
## v tidyr       1.1.2       v feasts      0.1.7  
## v lubridate   1.7.9.2     v fable       0.3.0  
## v ggplot2     3.3.3
## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date()   masks base::date()
## x dplyr::filter()     masks stats::filter()
## x tsibble::interval() masks lubridate::interval()
## x dplyr::lag()        masks stats::lag()
library(fpp2)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## -- Attaching packages ---------------------------------------------- fpp2 2.4 --
## v forecast  8.13     v expsmooth 2.3 
## v fma       2.4
## 
## 
## Attaching package: 'fpp2'
## The following object is masked from 'package:fpp3':
## 
##     insurance
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v readr   1.4.0     v stringr 1.4.0
## v purrr   0.3.4     v forcats 0.5.1
## Warning: package 'stringr' was built under R version 4.0.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date()        masks base::date()
## x dplyr::filter()          masks stats::filter()
## x kableExtra::group_rows() masks dplyr::group_rows()
## x lubridate::intersect()   masks base::intersect()
## x tsibble::interval()      masks lubridate::interval()
## x dplyr::lag()             masks stats::lag()
## x lubridate::setdiff()     masks base::setdiff()
## x lubridate::union()       masks base::union()
myurl="https://www.eia.gov/dnav/ng/hist_xls/N3010US2m.xls"
GET(myurl, write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://www.eia.gov/dnav/ng/hist_xls/N3010US2m.xls]
##   Date: 2021-04-20 00:30
##   Status: 200
##   Content-Type: application/vnd.ms-excel
##   Size: 52.7 kB
## <ON DISK>  C:\Users\LAWREN~1\AppData\Local\Temp\RtmpkrJET6\file45545b86410c.xls
mydata=read_excel(tf, 2L) #second sheet
temp=as.numeric(unlist(mydata[2]))
## Warning: NAs introduced by coercion
temp=temp[-c(1:2)] #two extra blank fields
temp=temp[-length(temp)]

Train / Validation

myts=ts(temp, start=c(1973,1), frequency=12)
train=ts(temp[1:540],start=c(1973,1), frequency=12)
test=ts(temp[541:576], start=c(2018,1), frequency=12)

Plots

autoplot(myts)

autoplot(acf(myts))

autoplot(pacf(myts))

Model Evaluation

ndiffs(myts)
## [1] 0
nsdiffs(myts)
## [1] 1
myts %>% diff(lag=12) %>% ggtsdisplay()

Auto Arima

myarima=auto.arima(train)
myets=ets(train)
mypredict=forecast(myarima, 36)
mypredict2=forecast(myets, 36)

Residual Tests

for (i in 1:30){
  print(Box.test(myarima$residuals,lag=i,type='Ljung-Box'))
}
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 0.0017012, df = 1, p-value = 0.9671
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 0.24325, df = 2, p-value = 0.8855
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 1.5889, df = 3, p-value = 0.6619
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 1.5932, df = 4, p-value = 0.81
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 1.8654, df = 5, p-value = 0.8674
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 1.8688, df = 6, p-value = 0.9314
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 1.8699, df = 7, p-value = 0.9667
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 2.0237, df = 8, p-value = 0.9803
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 2.1203, df = 9, p-value = 0.9894
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 2.3637, df = 10, p-value = 0.9927
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 7.0443, df = 11, p-value = 0.7955
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 7.8904, df = 12, p-value = 0.7936
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 8.1821, df = 13, p-value = 0.8315
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 8.5581, df = 14, p-value = 0.8583
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 8.6401, df = 15, p-value = 0.8956
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 8.691, df = 16, p-value = 0.9256
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 8.7028, df = 17, p-value = 0.9491
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 8.703, df = 18, p-value = 0.9662
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 8.7305, df = 19, p-value = 0.9777
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 8.7848, df = 20, p-value = 0.9853
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 9.3375, df = 21, p-value = 0.9863
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 9.676, df = 22, p-value = 0.989
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 12.374, df = 23, p-value = 0.9643
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 14.654, df = 24, p-value = 0.9305
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 14.72, df = 25, p-value = 0.9477
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 16.638, df = 26, p-value = 0.9196
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 16.686, df = 27, p-value = 0.9386
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 17.449, df = 28, p-value = 0.9392
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 17.45, df = 29, p-value = 0.9547
## 
## 
##  Box-Ljung test
## 
## data:  myarima$residuals
## X-squared = 17.451, df = 30, p-value = 0.9668
checkresiduals(myarima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,0,0)(1,1,2)[12]
## Q* = 14.654, df = 19, p-value = 0.7443
## 
## Model df: 5.   Total lags used: 24
checkresiduals(myets)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(M,N,M)
## Q* = 146.15, df = 10, p-value < 2.2e-16
## 
## Model df: 14.   Total lags used: 24

Accuracy

acc1=accuracy(mypredict, test)
acc1%>%kbl(caption="Arima")%>%kable_classic(html_font="Cambria")
Arima
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training set -1285.208 41755.53 26621.84 -1.302623 6.005785 0.7204466 -0.0017700 NA
Test set 16405.457 54441.79 36901.37 2.170124 8.704602 0.9986337 0.0947348 0.3175987
acc2=accuracy(mypredict2,test)
acc2%>%kbl(caption="ETS")%>%kable_classic(html_font="Cambria")
ETS
ME RMSE MAE MPE MAPE MASE ACF1 Theil’s U
Training set 2808.369 44218.50 28214.69 -0.8015371 6.518725 0.7635525 0.1974349 NA
Test set 18634.710 52726.81 35843.38 2.9503235 8.396654 0.9700019 0.0423135 0.2897158

Plots

autoplot(mypredict)

autoplot(mypredict2)