# Clear the workspace
#Build 1 Naive, 1 Drift, 1 SNAIVE, and 1 ETS model on only four years of the data. Forecast the 5th year. Provide measures that assess the performance of your three models. Which performed the best and why?
rm(list = ls()) # Clear environment
gc() # Clear unused memory
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 504357 27.0 1119164 59.8 643711 34.4
## Vcells 895808 6.9 8388608 64.0 1649082 12.6
cat("\f") # Clear the console
library("feasts")
## Warning: package 'feasts' was built under R version 4.1.3
## Loading required package: fabletools
## Warning: package 'fabletools' was built under R version 4.1.3
library("seasonal")
## Warning: package 'seasonal' was built under R version 4.1.3
library("tsibble")
## Warning: package 'tsibble' was built under R version 4.1.3
##
## Attaching package: 'tsibble'
## The following objects are masked from 'package:base':
##
## intersect, setdiff, union
library("tsibbledata")
## Warning: package 'tsibbledata' was built under R version 4.1.3
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("ggplot2")
## Warning: package 'ggplot2' was built under R version 4.1.3
library("forecast")
## Warning: package 'forecast' was built under R version 4.1.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Attaching package: 'forecast'
## The following objects are masked from 'package:fabletools':
##
## accuracy, forecast
library("fable")
## Warning: package 'fable' was built under R version 4.1.3
library("weathermetrics")
## Warning: package 'weathermetrics' was built under R version 4.1.3
library("fpp3")
## Warning: package 'fpp3' was built under R version 4.1.3
## -- Attaching packages ---------------------------------------------- fpp3 0.5 --
## v tibble 3.1.6 v lubridate 1.8.0
## v tidyr 1.2.0
## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x tsibble::intersect() masks base::intersect()
## x lubridate::interval() masks tsibble::interval()
## x dplyr::lag() masks stats::lag()
## x tsibble::setdiff() masks base::setdiff()
## x tsibble::union() masks base::union()
## x tibble::view() masks seasonal::view()
library("lubridate")
library("zoo")
## Warning: package 'zoo' was built under R version 4.1.3
##
## Attaching package: 'zoo'
## The following object is masked from 'package:tsibble':
##
## index
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library("magrittr")
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
##
## extract
#set wd
setwd("C:/Users/polhe/Downloads/")
# load data from EIA
raw_energydata <- read.csv("MER_T04_04.csv"
, check.names = FALSE
, stringsAsFactors = FALSE
, na.strings = ""
)
#rename the date column
#rename the "YYYYMM" column to "Date"
names(raw_energydata)[names(raw_energydata) == 'YYYYMM'] <- 'Date'
# rename the value column
#rename the value column to express "Billion cubic feet.
names(raw_energydata)[3] <- 'Bcf'
# use zoo to zip together the month and year to make the date
raw_energydata$Date <- as.yearmon(paste(raw_energydata$Year, raw_energydata$Month), "%Y %m")
#my_ts=raw_energydata
str(raw_energydata)
## 'data.frame': 96 obs. of 4 variables:
## $ Month: int 1 2 3 4 5 6 7 8 9 10 ...
## $ Year : int 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 ...
## $ Bcf : num 725 742 193 -321 -496 ...
## $ Date : 'yearmon' num Jan 2015 Feb 2015 Mar 2015 Apr 2015 ...
summary(raw_energydata)
## Month Year Bcf Date
## Min. : 1.00 Min. :2015 Min. :-496.4270 Min. :2015
## 1st Qu.: 3.75 1st Qu.:2017 1st Qu.:-321.8195 1st Qu.:2017
## Median : 6.50 Median :2018 Median :-167.1390 Median :2019
## Mean : 6.50 Mean :2018 Mean : 0.7012 Mean :2019
## 3rd Qu.: 9.25 3rd Qu.:2020 3rd Qu.: 277.0630 3rd Qu.:2021
## Max. :12.00 Max. :2022 Max. : 993.7560 Max. :2023
#view(raw_energydata)
energy_ts <- ts(raw_energydata, frequency=12, start=c(2015,1))
#view my_ts to make sure tha tthe data looks fine.
#view(energy_ts)
#autoplot(energy_ts)
#make univariate time series
Uv_ts <- energy_ts[,"Bcf"] # returns a vector
train = energy_ts[1:48,]#first 4 years
test = energy_ts[49:60,]#last year
uvtrain <- train[,"Bcf"] # returns a vector
uvtest <- test[,"Bcf"] # returns a vector
#view(train)
#view(Uv_ts )
#autoplot(energy_ts)
#autoplot(Uv_ts)
#view(uvtrain)
#view(uvtest)
plot(uvtrain)
plot(uvtest)
#view(energy_ts)
#From plotting the data it looks like this is clearly a seasonal pattern.
#now lets try to figure out which model can best forecast the data.
#ETS model
energy_ets <- ets(uvtrain, model = "AAN")
autoplot(forecast(energy_ets))
autoplot(decompose(energy_ts))
summary(energy_ets)
## ETS(A,A,N)
##
## Call:
## ets(y = uvtrain, model = "AAN")
##
## Smoothing parameters:
## alpha = 0.9999
## beta = 0.0177
##
## Initial states:
## l = 570.5196
## b = -62.4643
##
## sigma: 294.1555
##
## AIC AICc BIC
## 737.3155 738.7441 746.6715
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 43.88572 281.6324 219.3024 109.2878 192.2281 1.02763 0.3965559
#ETS model
energy_ets <- ets(uvtrain, model = "AAN")
autoplot(forecast(energy_ets))
summary(energy_ets)
## ETS(A,A,N)
##
## Call:
## ets(y = uvtrain, model = "AAN")
##
## Smoothing parameters:
## alpha = 0.9999
## beta = 0.0177
##
## Initial states:
## l = 570.5196
## b = -62.4643
##
## sigma: 294.1555
##
## AIC AICc BIC
## 737.3155 738.7441 746.6715
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 43.88572 281.6324 219.3024 109.2878 192.2281 1.02763 0.3965559
#Naive
forecast_naive <- c(NA, uvtrain[-length(uvtrain)])
plot(forecast_naive)
MAE = mean(abs(uvtrain-forecast_naive), na.rm=T)
print(MAE)
## [1] 213.4059
#Drift
drift <- auto.arima(uvtrain, seasonal = TRUE, allowdrift = TRUE, stepwise = TRUE)
summary(drift)
## Series: uvtrain
## ARIMA(2,0,2) with zero mean
##
## Coefficients:
## ar1 ar2 ma1 ma2
## 1.4863 -0.7555 -0.5015 -0.4295
## s.e. 0.1064 0.1083 0.1690 0.1881
##
## sigma^2 = 35931: log likelihood = -319.47
## AIC=648.95 AICc=650.37 BIC=658.3
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 11.63519 181.4845 145.8458 36.49248 63.76128 0.6834197 -0.03156419
accuracy(drift)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 11.63519 181.4845 145.8458 36.49248 63.76128 0.6834197 -0.03156419
#SNAIVE
snaive_model=SNAIVE(uvtrain)
#fc=fit %>%forecast(uvtest)
#report(fit)
#Im a little confused as to why I am having this problem using the “report(fit)” function, The ETS model did okay given its low MASE, but something to also look at is the alpha of this model is very close to 1. That means that in terms of forecasting power it is similar to a random walk process. What stands out the most to me is the Drift model, it has a fairly low MASE at just 0.6834197, a high RMSE and MAE at 181.5 and 145.8 respectively.