Build 1 ETS, 1 ARIMA, 1 NNETAR, and one ensemble model on four years of that data. Forecast the 5th year.
rm(list = ls()) # Clear environment
gc() # Clear unused memory
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 519906 27.8 1155251 61.7 660388 35.3
## Vcells 947513 7.3 8388608 64.0 1770124 13.6
cat("\f") # Clear the console
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(fabletools)
library(tsibble)
##
## Attaching package: 'tsibble'
## The following objects are masked from 'package:base':
##
## intersect, setdiff, union
require(fpp3)
## Loading required package: fpp3
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble 3.2.1 ✔ ggplot2 3.4.2
## ✔ dplyr 1.1.2 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.0 ✔ feasts 0.3.1
## ✔ lubridate 1.9.2 ✔ fable 0.3.3
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ lubridate::interval() masks tsibble::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
require(ggplot2)
library(Metrics)
##
## Attaching package: 'Metrics'
## The following object is masked from 'package:fabletools':
##
## accuracy
require(kableExtra)
## Loading required package: kableExtra
## Error: package or namespace load failed for 'kableExtra':
## .onLoad failed in loadNamespace() for 'kableExtra', details:
## call: !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output %in%
## error: 'length = 2' in coercion to 'logical(1)'
require(latex2exp)
## Loading required package: latex2exp
require(readxl)
## Loading required package: readxl
require(tidyverse)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.4
## ✔ purrr 1.0.1 ✔ stringr 1.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ lubridate::interval() masks tsibble::interval()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
require(magrittr)
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
##
## The following object is masked from 'package:purrr':
##
## set_names
##
## The following object is masked from 'package:tidyr':
##
## extract
#set wd
setwd("C:/Users/polhe/Downloads/")
# load data from NASS
nj_gas <- read.csv("nj_gas_data.csv"
, check.names = FALSE
, stringsAsFactors = FALSE
, na.strings = "")
#view(nj_gas)
nj_gas$Year=seq(as.Date("2017/12/1"), as.Date("2022/12/01"), "months")
#view(nj_gas)
myts=nj_gas%>%
mutate(YearMonth = yearmonth(as.character(Year))) %>%
as_tsibble(index = YearMonth)
myts%>%autoplot(Amount)
#view(nj_gas)
#make sure myts is tsibble
myts_tsbl <- as_tsibble(myts, index = Year)
view(myts_tsbl)
# Perform the ADF test on the time series
adf_result <- adf.test(myts$Amount)
## Warning in adf.test(myts$Amount): p-value smaller than printed p-value
# Print the ADF test results
cat("ADF Test Results:\n")
## ADF Test Results:
cat("Test statistic:", adf_result$statistic, "\n")
## Test statistic: -4.605307
cat("p-value:", adf_result$p.value, "\n")
## p-value: 0.01
cat("Number of lags used:", adf_result$lags, "\n")
## Number of lags used:
cat("Critical values:", adf_result$critical, "\n")
## Critical values:
cat("Data: the time series 'myts$Amount' \n")
## Data: the time series 'myts$Amount'
# Test for stationarity with a quadratic trend using the KPSS test
kpss.test(myts$Amount, null = "Level")
## Warning in kpss.test(myts$Amount, null = "Level"): p-value smaller than printed
## p-value
##
## KPSS Test for Level Stationarity
##
## data: myts$Amount
## KPSS Level = 0.77706, Truncation lag parameter = 3, p-value = 0.01
#test for data type because I need data as a tsibble
class(nj_gas)
## [1] "data.frame"
view(nj_gas)
myts%>%ACF(Amount)%>%autoplot()
#highly seasonal
myts%>%gg_season(Amount)
myts%>%gg_subseries(Amount)
myts%>%gg_lag(Amount)
comp=myts%>%model(stl=STL(Amount))%>%components()
comp|>as_tsibble() |>autoplot(Amount)+ geom_line(aes(y=trend), colour = "red")+
geom_line(aes(y=season_adjust), colour = "blue")
comp%>%autoplot()
#split data
train=myts[1:48,]
test=myts[49:nrow(myts),]
#now that we have read in the data and know that it is stationary, we have split the data. We can now build our 3 models
#models
#models <- list(
#ETS
m1=train |>model(ETS_Model=ETS(Amount))
#ARIMA
m2=train |>model(ARIMA_Model=ARIMA(Amount))
#Ensemble Model
m3=train|>model(Ensemble_Model=(ETS(Amount)+ARIMA(Amount))/2)
#NNEAR
m4=train |>model(NNETAR((Amount)))
#)
for (i in 1:4){report(eval(parse(text=paste0('m',i))))}
## Series: Amount
## Model: ETS(A,N,A)
## Smoothing parameters:
## alpha = 0.457527
## gamma = 0.0002558804
##
## Initial states:
## l[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5] s[-6]
## 10.46873 -1.402856 -1.329369 -0.8784021 0.6027845 2.034797 2.604881 2.055179
## s[-7] s[-8] s[-9] s[-10] s[-11]
## 1.097427 -0.433917 -1.411389 -1.387151 -1.551985
##
## sigma^2: 0.2206
##
## AIC AICc BIC
## 126.7151 141.7151 154.7831
## Series: Amount
## Model: ARIMA(1,0,0)(1,1,0)[12] w/ drift
##
## Coefficients:
## ar1 sar1 constant
## 0.3598 -0.6950 0.4895
## s.e. 0.1624 0.1206 0.0844
##
## sigma^2 estimated as 0.2033: log likelihood=-24.86
## AIC=57.73 AICc=59.02 BIC=64.06
## Series: Amount
## Model: COMBINATION
## Combination: Amount * 0.5
##
## =========================
##
## Series: Amount
## Model: COMBINATION
## Combination: Amount + Amount
##
## ============================
##
## Series: Amount
## Model: ETS(A,N,A)
## Smoothing parameters:
## alpha = 0.457527
## gamma = 0.0002558804
##
## Initial states:
## l[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5] s[-6]
## 10.46873 -1.402856 -1.329369 -0.8784021 0.6027845 2.034797 2.604881 2.055179
## s[-7] s[-8] s[-9] s[-10] s[-11]
## 1.097427 -0.433917 -1.411389 -1.387151 -1.551985
##
## sigma^2: 0.2206
##
## AIC AICc BIC
## 126.7151 141.7151 154.7831
##
## Series: Amount
## Model: ARIMA(1,0,0)(1,1,0)[12] w/ drift
##
## Coefficients:
## ar1 sar1 constant
## 0.3598 -0.6950 0.4895
## s.e. 0.1624 0.1206 0.0844
##
## sigma^2 estimated as 0.2033: log likelihood=-24.86
## AIC=57.73 AICc=59.02 BIC=64.06
##
##
## Series: Amount
## Model: NNAR(5,1,4)[12]
## Transformation: (Amount)
##
## Average of 20 networks, each of which is
## a 6-4-1 network with 33 weights
## options were - linear output units
##
## sigma^2 estimated as 0.005505
myplot=function(model){
model|>forecast(test)|>autoplot(myts)+geom_line(aes(y=.fitted, col=.model),data=augment(model))+ggtitle(names(model))}
for (i in 1:3){print(myplot(eval(parse(text=paste0('m',i)))))} #NNETAR takes long to plot, avoiding that model
library(forecast)
##
## Attaching package: 'forecast'
## The following object is masked from 'package:Metrics':
##
## accuracy
## The following object is masked from 'package:fabletools':
##
## accuracy
#forecast the next year for all models
f1 <- m1 |> forecast(h = 12)
f1 <- m2 |> forecast(h = 12)
f1 <- m3 |> forecast(h = 12)
#f1 <- m4 |> forecast(h = 12)
forecasts <- list(
ETS_Model = ETS(train$Amount),
ARIMA_Model = Arima(train$Amount),
Ensemble_Model = (ETS(train$Amount) + Arima(train$Amount))/2,
NNETAR_Model = nnetar(train$Amount)
)
# Create list of forecasts
#forecasts <- lapply(models, forecast, h = length(test$Amount))
# Extract point forecasts and store as matrix
#forecast_matrix <- sapply(forecasts, function(x) x$mean)
# Generate forecasts for the test set
#forecast_list <- lapply(forecasts, function(model) forecast(model, h = 12))
# Compute accuracy metrics for each forecast
#accuracy_list <- lapply(forecast_list, function(f) {
# accuracy(f, test)
#})
# Print accuracy metrics for each model
#for (i in seq_along(accuracy_list)) {
# cat(paste0("Accuracy metrics for ", names(forecasts)[i], ":\n"))
# print(accuracy_list[[i]])
#}
I had a lot of problems getting the accuracy function to work but I will do my best to compare models based on AIC values. The ETS(A,N,A) had an AIC of 126.7151, while the ARIMA(1,0,0) had an AIC of 57.73,the Neural Network had a very lowsigma^2 estimated as 0.005925. I dont feel gret about the understanding of Neural networks and forecasting so even though I know that the Neural Network is the best model for this sort of work, I would have to go with the ARIMA model because it has a lower AIC value and I feel more comfortable interpreting it and explaining how it works.
Having a lot of issues with getting accuracy metrics to show still.