adec7460_HW

# Clear the workspace



#Build 1 Naive, 1 Drift, 1 SNAIVE, and 1 ETS model on only four years of the data. Forecast the 5th year. Provide measures that assess the performance of your three models. Which performed the best and why?

rm(list = ls()) # Clear environment
gc()            # Clear unused memory

##          used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 504357 27.0    1119164 59.8   643711 34.4
## Vcells 895808  6.9    8388608 64.0  1649082 12.6

cat("\f")       # Clear the console

library("feasts")

## Warning: package 'feasts' was built under R version 4.1.3

## Loading required package: fabletools

## Warning: package 'fabletools' was built under R version 4.1.3

library("seasonal")

## Warning: package 'seasonal' was built under R version 4.1.3

library("tsibble")

## Warning: package 'tsibble' was built under R version 4.1.3

## 
## Attaching package: 'tsibble'

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union

library("tsibbledata")

## Warning: package 'tsibbledata' was built under R version 4.1.3

library("dplyr")

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library("ggplot2")

## Warning: package 'ggplot2' was built under R version 4.1.3

library("forecast")

## Warning: package 'forecast' was built under R version 4.1.3

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## 
## Attaching package: 'forecast'

## The following objects are masked from 'package:fabletools':
## 
##     accuracy, forecast

library("fable")

## Warning: package 'fable' was built under R version 4.1.3

library("weathermetrics")

## Warning: package 'weathermetrics' was built under R version 4.1.3

library("fpp3")

## Warning: package 'fpp3' was built under R version 4.1.3

## -- Attaching packages ---------------------------------------------- fpp3 0.5 --

## v tibble    3.1.6     v lubridate 1.8.0
## v tidyr     1.2.0

## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date()     masks base::date()
## x dplyr::filter()       masks stats::filter()
## x tsibble::intersect()  masks base::intersect()
## x lubridate::interval() masks tsibble::interval()
## x dplyr::lag()          masks stats::lag()
## x tsibble::setdiff()    masks base::setdiff()
## x tsibble::union()      masks base::union()
## x tibble::view()        masks seasonal::view()

library("lubridate")
library("zoo")

## Warning: package 'zoo' was built under R version 4.1.3

## 
## Attaching package: 'zoo'

## The following object is masked from 'package:tsibble':
## 
##     index

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library("magrittr")

## 
## Attaching package: 'magrittr'

## The following object is masked from 'package:tidyr':
## 
##     extract

#set wd
setwd("C:/Users/polhe/Downloads/")


# load data from EIA

raw_energydata <- read.csv("MER_T04_04.csv"
                           , check.names = FALSE
                           , stringsAsFactors = FALSE
                           , na.strings = ""
)

#rename the date column 
#rename the "YYYYMM" column to "Date"

names(raw_energydata)[names(raw_energydata) == 'YYYYMM'] <- 'Date'

# rename the value column 
#rename the value column to express "Billion cubic feet. 
names(raw_energydata)[3] <- 'Bcf'


# use zoo to zip together the month and year to make the date
raw_energydata$Date <- as.yearmon(paste(raw_energydata$Year, raw_energydata$Month), "%Y %m")

#my_ts=raw_energydata


  
str(raw_energydata)

## 'data.frame':    96 obs. of  4 variables:
##  $ Month: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Year : int  2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 ...
##  $ Bcf  : num  725 742 193 -321 -496 ...
##  $ Date : 'yearmon' num  Jan 2015 Feb 2015 Mar 2015 Apr 2015 ...

summary(raw_energydata)

##      Month            Year           Bcf                 Date     
##  Min.   : 1.00   Min.   :2015   Min.   :-496.4270   Min.   :2015  
##  1st Qu.: 3.75   1st Qu.:2017   1st Qu.:-321.8195   1st Qu.:2017  
##  Median : 6.50   Median :2018   Median :-167.1390   Median :2019  
##  Mean   : 6.50   Mean   :2018   Mean   :   0.7012   Mean   :2019  
##  3rd Qu.: 9.25   3rd Qu.:2020   3rd Qu.: 277.0630   3rd Qu.:2021  
##  Max.   :12.00   Max.   :2022   Max.   : 993.7560   Max.   :2023

#view(raw_energydata)

energy_ts <- ts(raw_energydata, frequency=12, start=c(2015,1))

#view my_ts to make sure tha tthe data looks fine.

#view(energy_ts)
#autoplot(energy_ts)

#make univariate time series

Uv_ts <- energy_ts[,"Bcf"]             # returns a vector


train = energy_ts[1:48,]#first 4 years
test = energy_ts[49:60,]#last year




uvtrain <- train[,"Bcf"]             # returns a vector
uvtest <- test[,"Bcf"]             # returns a vector


#view(train)
#view(Uv_ts )
#autoplot(energy_ts)
#autoplot(Uv_ts)
#view(uvtrain)
#view(uvtest)

plot(uvtrain)

plot(uvtest)

#view(energy_ts)

#From plotting the data it looks like this is clearly a seasonal pattern.
#now lets try to figure out which model can best forecast the data.


#ETS model 
energy_ets <- ets(uvtrain, model = "AAN")
autoplot(forecast(energy_ets))

autoplot(decompose(energy_ts))

summary(energy_ets)

## ETS(A,A,N) 
## 
## Call:
##  ets(y = uvtrain, model = "AAN") 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.0177 
## 
##   Initial states:
##     l = 570.5196 
##     b = -62.4643 
## 
##   sigma:  294.1555
## 
##      AIC     AICc      BIC 
## 737.3155 738.7441 746.6715 
## 
## Training set error measures:
##                    ME     RMSE      MAE      MPE     MAPE    MASE      ACF1
## Training set 43.88572 281.6324 219.3024 109.2878 192.2281 1.02763 0.3965559

#ETS model 
energy_ets <- ets(uvtrain, model = "AAN")
autoplot(forecast(energy_ets))

summary(energy_ets)

## ETS(A,A,N) 
## 
## Call:
##  ets(y = uvtrain, model = "AAN") 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.0177 
## 
##   Initial states:
##     l = 570.5196 
##     b = -62.4643 
## 
##   sigma:  294.1555
## 
##      AIC     AICc      BIC 
## 737.3155 738.7441 746.6715 
## 
## Training set error measures:
##                    ME     RMSE      MAE      MPE     MAPE    MASE      ACF1
## Training set 43.88572 281.6324 219.3024 109.2878 192.2281 1.02763 0.3965559

#Naive
forecast_naive <- c(NA, uvtrain[-length(uvtrain)])
plot(forecast_naive)

MAE = mean(abs(uvtrain-forecast_naive), na.rm=T)

print(MAE)

## [1] 213.4059

#Drift


drift <- auto.arima(uvtrain, seasonal = TRUE, allowdrift = TRUE, stepwise = TRUE)
summary(drift)

## Series: uvtrain 
## ARIMA(2,0,2) with zero mean 
## 
## Coefficients:
##          ar1      ar2      ma1      ma2
##       1.4863  -0.7555  -0.5015  -0.4295
## s.e.  0.1064   0.1083   0.1690   0.1881
## 
## sigma^2 = 35931:  log likelihood = -319.47
## AIC=648.95   AICc=650.37   BIC=658.3
## 
## Training set error measures:
##                    ME     RMSE      MAE      MPE     MAPE      MASE        ACF1
## Training set 11.63519 181.4845 145.8458 36.49248 63.76128 0.6834197 -0.03156419

accuracy(drift)

##                    ME     RMSE      MAE      MPE     MAPE      MASE        ACF1
## Training set 11.63519 181.4845 145.8458 36.49248 63.76128 0.6834197 -0.03156419

#SNAIVE
snaive_model=SNAIVE(uvtrain)




#fc=fit %>%forecast(uvtest)
#report(fit)

#Im a little confused as to why I am having this problem using the “report(fit)” function, The ETS model did okay given its low MASE, but something to also look at is the alpha of this model is very close to 1. That means that in terms of forecasting power it is similar to a random walk process. What stands out the most to me is the Drift model, it has a fairly low MASE at just 0.6834197, a high RMSE and MAE at 181.5 and 145.8 respectively.

adec7460_HW_1

Hunter P

2023-04-02