ADEC_7460

Build 1 ETS, 1 ARIMA, 1 NNETAR, and one ensemble model on four years of that data. Forecast the 5th year.

rm(list = ls()) # Clear environment
gc()            # Clear unused memory

##          used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 519906 27.8    1155251 61.7   660388 35.3
## Vcells 947513  7.3    8388608 64.0  1770124 13.6

cat("\f")       # Clear the console

library(tseries)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(fabletools)
library(tsibble)

## 
## Attaching package: 'tsibble'

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union

require(fpp3)

## Loading required package: fpp3

## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──

## ✔ tibble      3.2.1     ✔ ggplot2     3.4.2
## ✔ dplyr       1.1.2     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.0     ✔ feasts      0.3.1
## ✔ lubridate   1.9.2     ✔ fable       0.3.3

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()     masks base::date()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ tsibble::intersect()  masks base::intersect()
## ✖ lubridate::interval() masks tsibble::interval()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ tsibble::setdiff()    masks base::setdiff()
## ✖ tsibble::union()      masks base::union()

require(ggplot2)
library(Metrics)

## 
## Attaching package: 'Metrics'

## The following object is masked from 'package:fabletools':
## 
##     accuracy

require(kableExtra)

## Loading required package: kableExtra

## Error: package or namespace load failed for 'kableExtra':
##  .onLoad failed in loadNamespace() for 'kableExtra', details:
##   call: !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output %in% 
##   error: 'length = 2' in coercion to 'logical(1)'

require(latex2exp)

## Loading required package: latex2exp

require(readxl)

## Loading required package: readxl

require(tidyverse)

## Loading required package: tidyverse

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0     ✔ readr   2.1.4
## ✔ purrr   1.0.1     ✔ stringr 1.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()       masks stats::filter()
## ✖ lubridate::interval() masks tsibble::interval()
## ✖ dplyr::lag()          masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

require(magrittr)

## Loading required package: magrittr
## 
## Attaching package: 'magrittr'
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## The following object is masked from 'package:tidyr':
## 
##     extract

#set wd
setwd("C:/Users/polhe/Downloads/")


# load data from NASS

nj_gas <- read.csv("nj_gas_data.csv"
                           , check.names = FALSE
                           , stringsAsFactors = FALSE
                           , na.strings = "")
#view(nj_gas)

nj_gas$Year=seq(as.Date("2017/12/1"), as.Date("2022/12/01"), "months")
#view(nj_gas)


myts=nj_gas%>%
  mutate(YearMonth = yearmonth(as.character(Year))) %>%
  as_tsibble(index = YearMonth)
myts%>%autoplot(Amount)

#view(nj_gas)


#make sure myts is tsibble

myts_tsbl <- as_tsibble(myts, index = Year)
view(myts_tsbl)

# Perform the ADF test on the time series
adf_result <- adf.test(myts$Amount)

## Warning in adf.test(myts$Amount): p-value smaller than printed p-value

# Print the ADF test results
cat("ADF Test Results:\n")

## ADF Test Results:

cat("Test statistic:", adf_result$statistic, "\n")

## Test statistic: -4.605307

cat("p-value:", adf_result$p.value, "\n")

## p-value: 0.01

cat("Number of lags used:", adf_result$lags, "\n")

## Number of lags used:

cat("Critical values:", adf_result$critical, "\n")

## Critical values:

cat("Data: the time series 'myts$Amount' \n")

## Data: the time series 'myts$Amount'

# Test for stationarity with a quadratic trend using the KPSS test
kpss.test(myts$Amount, null = "Level")

## Warning in kpss.test(myts$Amount, null = "Level"): p-value smaller than printed
## p-value

## 
##  KPSS Test for Level Stationarity
## 
## data:  myts$Amount
## KPSS Level = 0.77706, Truncation lag parameter = 3, p-value = 0.01

#test for data type because I need data as a tsibble

class(nj_gas)

## [1] "data.frame"

view(nj_gas)

myts%>%ACF(Amount)%>%autoplot()

#highly seasonal

myts%>%gg_season(Amount)

myts%>%gg_subseries(Amount)

myts%>%gg_lag(Amount)

comp=myts%>%model(stl=STL(Amount))%>%components()
comp|>as_tsibble() |>autoplot(Amount)+  geom_line(aes(y=trend), colour = "red")+
  geom_line(aes(y=season_adjust), colour = "blue")

comp%>%autoplot()

#split data

train=myts[1:48,]
test=myts[49:nrow(myts),]

#now that we have read in the data and know that it is stationary, we have split the data. We can now build our 3 models


#models


#models <- list(
#ETS
m1=train |>model(ETS_Model=ETS(Amount))

#ARIMA
m2=train |>model(ARIMA_Model=ARIMA(Amount))
#Ensemble Model


m3=train|>model(Ensemble_Model=(ETS(Amount)+ARIMA(Amount))/2)
#NNEAR

m4=train |>model(NNETAR((Amount)))
#)





for (i in 1:4){report(eval(parse(text=paste0('m',i))))}

## Series: Amount 
## Model: ETS(A,N,A) 
##   Smoothing parameters:
##     alpha = 0.457527 
##     gamma = 0.0002558804 
## 
##   Initial states:
##      l[0]      s[0]     s[-1]      s[-2]     s[-3]    s[-4]    s[-5]    s[-6]
##  10.46873 -1.402856 -1.329369 -0.8784021 0.6027845 2.034797 2.604881 2.055179
##     s[-7]     s[-8]     s[-9]    s[-10]    s[-11]
##  1.097427 -0.433917 -1.411389 -1.387151 -1.551985
## 
##   sigma^2:  0.2206
## 
##      AIC     AICc      BIC 
## 126.7151 141.7151 154.7831 
## Series: Amount 
## Model: ARIMA(1,0,0)(1,1,0)[12] w/ drift 
## 
## Coefficients:
##          ar1     sar1  constant
##       0.3598  -0.6950    0.4895
## s.e.  0.1624   0.1206    0.0844
## 
## sigma^2 estimated as 0.2033:  log likelihood=-24.86
## AIC=57.73   AICc=59.02   BIC=64.06
## Series: Amount 
## Model: COMBINATION 
## Combination: Amount * 0.5
## 
## =========================
## 
## Series: Amount 
## Model: COMBINATION 
## Combination: Amount + Amount
## 
## ============================
## 
## Series: Amount 
## Model: ETS(A,N,A) 
##   Smoothing parameters:
##     alpha = 0.457527 
##     gamma = 0.0002558804 
## 
##   Initial states:
##      l[0]      s[0]     s[-1]      s[-2]     s[-3]    s[-4]    s[-5]    s[-6]
##  10.46873 -1.402856 -1.329369 -0.8784021 0.6027845 2.034797 2.604881 2.055179
##     s[-7]     s[-8]     s[-9]    s[-10]    s[-11]
##  1.097427 -0.433917 -1.411389 -1.387151 -1.551985
## 
##   sigma^2:  0.2206
## 
##      AIC     AICc      BIC 
## 126.7151 141.7151 154.7831 
## 
## Series: Amount 
## Model: ARIMA(1,0,0)(1,1,0)[12] w/ drift 
## 
## Coefficients:
##          ar1     sar1  constant
##       0.3598  -0.6950    0.4895
## s.e.  0.1624   0.1206    0.0844
## 
## sigma^2 estimated as 0.2033:  log likelihood=-24.86
## AIC=57.73   AICc=59.02   BIC=64.06
## 
## 
## Series: Amount 
## Model: NNAR(5,1,4)[12] 
## Transformation: (Amount) 
## 
## Average of 20 networks, each of which is
## a 6-4-1 network with 33 weights
## options were - linear output units 
## 
## sigma^2 estimated as 0.005505

myplot=function(model){
  model|>forecast(test)|>autoplot(myts)+geom_line(aes(y=.fitted, col=.model),data=augment(model))+ggtitle(names(model))}

for (i in 1:3){print(myplot(eval(parse(text=paste0('m',i)))))} #NNETAR takes long to plot, avoiding that model

library(forecast)

## 
## Attaching package: 'forecast'

## The following object is masked from 'package:Metrics':
## 
##     accuracy

## The following object is masked from 'package:fabletools':
## 
##     accuracy

#forecast the next year for all models
f1 <- m1 |> forecast(h = 12)
f1 <- m2 |> forecast(h = 12)
f1 <- m3 |> forecast(h = 12)
#f1 <- m4 |> forecast(h = 12)

forecasts <- list(
  ETS_Model = ETS(train$Amount),
  ARIMA_Model = Arima(train$Amount),
  Ensemble_Model = (ETS(train$Amount) + Arima(train$Amount))/2,
  NNETAR_Model = nnetar(train$Amount)
)


# Create list of forecasts
#forecasts <- lapply(models, forecast, h = length(test$Amount))

# Extract point forecasts and store as matrix
#forecast_matrix <- sapply(forecasts, function(x) x$mean)

# Generate forecasts for the test set
#forecast_list <- lapply(forecasts, function(model) forecast(model, h = 12))

# Compute accuracy metrics for each forecast
#accuracy_list <- lapply(forecast_list, function(f) {
#  accuracy(f, test)
#})

# Print accuracy metrics for each model
#for (i in seq_along(accuracy_list)) {
 # cat(paste0("Accuracy metrics for ", names(forecasts)[i], ":\n"))
#  print(accuracy_list[[i]])
#}

I had a lot of problems getting the accuracy function to work but I will do my best to compare models based on AIC values. The ETS(A,N,A) had an AIC of 126.7151, while the ARIMA(1,0,0) had an AIC of 57.73,the Neural Network had a very lowsigma^2 estimated as 0.005925. I dont feel gret about the understanding of Neural networks and forecasting so even though I know that the Neural Network is the best model for this sort of work, I would have to go with the ARIMA model because it has a lower AIC value and I feel more comfortable interpreting it and explaining how it works.

Having a lot of issues with getting accuracy metrics to show still.

ADEC_7460_hw3

Hunter P

2023-05-01