I chose to use the EIA data on wind power generation over the years. Energy data is very interesting to me as it relates to my job but also shows the real data of what is happening in the industry. The wind generation data is in thousands of megawatt hours. For reference, the average household uses 10.7 MWh annually, according to the EIA.
# Clear the workspace
rm(list = ls()) # Clear environment
gc() # Clear unused memory
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 434045 23.2 920657 49.2 638942 34.2
## Vcells 795992 6.1 8388608 64.0 1633063 12.5
cat("\f") # Clear the console
library("feasts")
## Warning: package 'feasts' was built under R version 4.0.5
## Loading required package: fabletools
## Warning: package 'fabletools' was built under R version 4.0.5
library("seasonal")
## Warning: package 'seasonal' was built under R version 4.0.5
library("tsibble")
## Warning: package 'tsibble' was built under R version 4.0.5
##
## Attaching package: 'tsibble'
## The following objects are masked from 'package:base':
##
## intersect, setdiff, union
library("tsibbledata")
## Warning: package 'tsibbledata' was built under R version 4.0.5
library("dplyr")
## Warning: package 'dplyr' was built under R version 4.0.5
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("ggplot2")
## Warning: package 'ggplot2' was built under R version 4.0.5
library("forecast")
## Warning: package 'forecast' was built under R version 4.0.5
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library("fable")
## Warning: package 'fable' was built under R version 4.0.5
library("weathermetrics")
## Warning: package 'weathermetrics' was built under R version 4.0.5
library("fpp3")
## Warning: package 'fpp3' was built under R version 4.0.5
## -- Attaching packages -------------------------------------------- fpp3 0.4.0 --
## v tibble 3.1.1 v lubridate 1.8.0
## v tidyr 1.1.4
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'lubridate' was built under R version 4.0.5
## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x tsibble::intersect() masks base::intersect()
## x lubridate::interval() masks tsibble::interval()
## x dplyr::lag() masks stats::lag()
## x tsibble::setdiff() masks base::setdiff()
## x tsibble::union() masks base::union()
## x tibble::view() masks seasonal::view()
library("lubridate")
# Set working directory
setwd("/Users/spoll/OneDrive/Documents/Boston College/Predictive Analytics_Forecasting/Week 4")
# Load the datasets
raw_energydata <- read.csv("Net_wind_generation_United_States_monthly.csv"
, check.names = FALSE
, stringsAsFactors = FALSE
, na.strings = ""
)
raw_energydata <- data.frame(raw_energydata)
#
#class(raw_energydata$Month)
#raw_energydata$Month <- as_date(raw_energydata$Month)
#raw_wind <- raw_energydata
#class(raw_wind$Month)
#raw_wind %>%
# mutate(Month=lubridate::dmy(Month))
#
# rename wind output column
names(raw_energydata)[2] <- 'mwhs'
raw_energydata$Month <- as.Date(raw_energydata$Month)
# Create test and train data frames
wind_train <- head(raw_energydata, nrow(raw_energydata)-6) %>%
dplyr::select("Month", "mwhs")
wind_test <- tail(raw_energydata, 6) %>%
dplyr::select("Month", "mwhs")
# ETS model
wind_ts <- ts(wind_train$mwhs, frequency = 12)
wind_test_ts <- ts(wind_test$mwhs, frequency = 12)
plot(wind_ts)
wind_ets <- ets(wind_ts, model = 'AAN')
wind_ets
## ETS(A,A,N)
##
## Call:
## ets(y = wind_ts, model = "AAN")
##
## Smoothing parameters:
## alpha = 0.9031
## beta = 1e-04
##
## Initial states:
## l = 410.8856
## b = 88.6213
##
## sigma: 2164.469
##
## AIC AICc BIC
## 5160.672 5160.921 5178.219
wind_f <- forecast(wind_ets)
plot(wind_ts ~ wind_f$fitted) + title(main = "ETS Model 1")
## integer(0)
autoplot(wind_f)
wind_ets2 <- ets(wind_ts, model = 'MNN')
wind_ets2
## ETS(M,N,N)
##
## Call:
## ets(y = wind_ts, model = "MNN")
##
## Smoothing parameters:
## alpha = 0.8656
##
## Initial states:
## l = 384.4365
##
## sigma: 0.1735
##
## AIC AICc BIC
## 4798.847 4798.946 4809.376
wind_f2 <- forecast(wind_ets2)
plot(wind_ts ~ wind_f2$fitted) + title(main = "ETS Model 2")
## integer(0)
autoplot(wind_f2)
# ARIMA Models
wind_ar1 <- auto.arima(wind_ts)
summary(wind_ar1)
## Series: wind_ts
## ARIMA(0,1,3)(0,1,2)[12]
##
## Coefficients:
## ma1 ma2 ma3 sma1 sma2
## -0.7052 -0.0343 -0.0984 -0.8701 0.2369
## s.e. 0.0703 0.0774 0.0746 0.0923 0.0866
##
## sigma^2 = 2322800: log likelihood = -2050.03
## AIC=4112.05 AICc=4112.42 BIC=4132.78
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 142.8618 1467.49 952.392 0.9149543 9.482212 0.5436101 -0.01895731
accuracy(wind_ar1)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 142.8618 1467.49 952.392 0.9149543 9.482212 0.5436101 -0.01895731
wind_ar_f <- forecast(wind_ar1)
plot(wind_ar_f)
autoplot(wind_ar_f)
wind_ar2 <- auto.arima(wind_ts, seasonal = TRUE, allowdrift = TRUE, stepwise = TRUE)
summary(wind_ar2)
## Series: wind_ts
## ARIMA(0,1,3)(0,1,2)[12]
##
## Coefficients:
## ma1 ma2 ma3 sma1 sma2
## -0.7052 -0.0343 -0.0984 -0.8701 0.2369
## s.e. 0.0703 0.0774 0.0746 0.0923 0.0866
##
## sigma^2 = 2322800: log likelihood = -2050.03
## AIC=4112.05 AICc=4112.42 BIC=4132.78
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 142.8618 1467.49 952.392 0.9149543 9.482212 0.5436101 -0.01895731
accuracy(wind_ar2)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 142.8618 1467.49 952.392 0.9149543 9.482212 0.5436101 -0.01895731
wind_ar_f2 <- forecast(wind_ar2, 12)
autoplot(wind_ar_f2)
plot(wind_ar_f2, 12)
The ARIMA model seemed to be the best model out of all of them. The ARIMA had the lowest error statistics except for MPE which the second ETS model had the lower MPE.
accuracy(wind_f)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.8312652 2146.871 1353.318 -3.283664 13.58344 0.7724523
## ACF1
## Training set 0.005648053
accuracy(wind_f2)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 102.7595 2150.411 1349.662 0.4191599 12.97825 0.7703656 0.03938794
accuracy(wind_ar_f)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 142.8618 1467.49 952.392 0.9149543 9.482212 0.5436101 -0.01895731
accuracy(wind_ar_f2)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 142.8618 1467.49 952.392 0.9149543 9.482212 0.5436101 -0.01895731
I had some issues trying to compare with the test data. Below is the code I was using. If anyone has any tips, I’d greatly appreciate it.
# Issue comparing forecast and test
# accuracy(wind_f, wind_test_ts)
# accuracy(wind_f2, wind_test_ts)
# accuracy(wind_ar_f, wind_test_ts)
# accuracy(wind_ar_f2, wind_test_ts)