Discussion 4

I chose to use the EIA data on wind power generation over the years. Energy data is very interesting to me as it relates to my job but also shows the real data of what is happening in the industry. The wind generation data is in thousands of megawatt hours. For reference, the average household uses 10.7 MWh annually, according to the EIA.

# Clear the workspace
rm(list = ls()) # Clear environment
gc()            # Clear unused memory

##          used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 434045 23.2     920657 49.2   638942 34.2
## Vcells 795992  6.1    8388608 64.0  1633063 12.5

cat("\f")       # Clear the console

library("feasts")

## Warning: package 'feasts' was built under R version 4.0.5

## Loading required package: fabletools

## Warning: package 'fabletools' was built under R version 4.0.5

library("seasonal")

## Warning: package 'seasonal' was built under R version 4.0.5

library("tsibble")

## Warning: package 'tsibble' was built under R version 4.0.5

## 
## Attaching package: 'tsibble'

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union

library("tsibbledata")

## Warning: package 'tsibbledata' was built under R version 4.0.5

library("dplyr")

## Warning: package 'dplyr' was built under R version 4.0.5

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library("ggplot2")

## Warning: package 'ggplot2' was built under R version 4.0.5

library("forecast")

## Warning: package 'forecast' was built under R version 4.0.5

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library("fable")

## Warning: package 'fable' was built under R version 4.0.5

library("weathermetrics")

## Warning: package 'weathermetrics' was built under R version 4.0.5

library("fpp3")

## Warning: package 'fpp3' was built under R version 4.0.5

## -- Attaching packages -------------------------------------------- fpp3 0.4.0 --

## v tibble    3.1.1     v lubridate 1.8.0
## v tidyr     1.1.4

## Warning: package 'tibble' was built under R version 4.0.5

## Warning: package 'tidyr' was built under R version 4.0.5

## Warning: package 'lubridate' was built under R version 4.0.5

## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date()     masks base::date()
## x dplyr::filter()       masks stats::filter()
## x tsibble::intersect()  masks base::intersect()
## x lubridate::interval() masks tsibble::interval()
## x dplyr::lag()          masks stats::lag()
## x tsibble::setdiff()    masks base::setdiff()
## x tsibble::union()      masks base::union()
## x tibble::view()        masks seasonal::view()

library("lubridate")


# Set working directory
setwd("/Users/spoll/OneDrive/Documents/Boston College/Predictive Analytics_Forecasting/Week 4")

# Load the datasets
raw_energydata <- read.csv("Net_wind_generation_United_States_monthly.csv"
                           , check.names = FALSE
                           , stringsAsFactors = FALSE
                           , na.strings = ""
)
raw_energydata <- data.frame(raw_energydata)
#
#class(raw_energydata$Month)
#raw_energydata$Month <- as_date(raw_energydata$Month)
#raw_wind <- raw_energydata

#class(raw_wind$Month)
#raw_wind %>% 
 # mutate(Month=lubridate::dmy(Month))
#

# rename wind output column
names(raw_energydata)[2] <- 'mwhs'
raw_energydata$Month <- as.Date(raw_energydata$Month)

# Create test and train data frames
wind_train <- head(raw_energydata, nrow(raw_energydata)-6) %>%  
                           dplyr::select("Month", "mwhs")
wind_test <- tail(raw_energydata, 6) %>%
                           dplyr::select("Month", "mwhs")



# ETS model
wind_ts <- ts(wind_train$mwhs, frequency = 12)
wind_test_ts <- ts(wind_test$mwhs, frequency = 12)
plot(wind_ts)

wind_ets <- ets(wind_ts, model = 'AAN')  
wind_ets

## ETS(A,A,N) 
## 
## Call:
##  ets(y = wind_ts, model = "AAN") 
## 
##   Smoothing parameters:
##     alpha = 0.9031 
##     beta  = 1e-04 
## 
##   Initial states:
##     l = 410.8856 
##     b = 88.6213 
## 
##   sigma:  2164.469
## 
##      AIC     AICc      BIC 
## 5160.672 5160.921 5178.219

wind_f <- forecast(wind_ets)
plot(wind_ts ~ wind_f$fitted) + title(main = "ETS Model 1")

## integer(0)

autoplot(wind_f)

wind_ets2 <- ets(wind_ts, model = 'MNN')  
wind_ets2

## ETS(M,N,N) 
## 
## Call:
##  ets(y = wind_ts, model = "MNN") 
## 
##   Smoothing parameters:
##     alpha = 0.8656 
## 
##   Initial states:
##     l = 384.4365 
## 
##   sigma:  0.1735
## 
##      AIC     AICc      BIC 
## 4798.847 4798.946 4809.376

wind_f2 <- forecast(wind_ets2)
plot(wind_ts ~ wind_f2$fitted) + title(main = "ETS Model 2")

## integer(0)

autoplot(wind_f2)

# ARIMA Models
wind_ar1 <- auto.arima(wind_ts)
summary(wind_ar1)

## Series: wind_ts 
## ARIMA(0,1,3)(0,1,2)[12] 
## 
## Coefficients:
##           ma1      ma2      ma3     sma1    sma2
##       -0.7052  -0.0343  -0.0984  -0.8701  0.2369
## s.e.   0.0703   0.0774   0.0746   0.0923  0.0866
## 
## sigma^2 = 2322800:  log likelihood = -2050.03
## AIC=4112.05   AICc=4112.42   BIC=4132.78
## 
## Training set error measures:
##                    ME    RMSE     MAE       MPE     MAPE      MASE        ACF1
## Training set 142.8618 1467.49 952.392 0.9149543 9.482212 0.5436101 -0.01895731

accuracy(wind_ar1)

##                    ME    RMSE     MAE       MPE     MAPE      MASE        ACF1
## Training set 142.8618 1467.49 952.392 0.9149543 9.482212 0.5436101 -0.01895731

wind_ar_f <- forecast(wind_ar1)
plot(wind_ar_f)

autoplot(wind_ar_f)

wind_ar2 <- auto.arima(wind_ts, seasonal = TRUE, allowdrift = TRUE, stepwise = TRUE)
summary(wind_ar2)

## Series: wind_ts 
## ARIMA(0,1,3)(0,1,2)[12] 
## 
## Coefficients:
##           ma1      ma2      ma3     sma1    sma2
##       -0.7052  -0.0343  -0.0984  -0.8701  0.2369
## s.e.   0.0703   0.0774   0.0746   0.0923  0.0866
## 
## sigma^2 = 2322800:  log likelihood = -2050.03
## AIC=4112.05   AICc=4112.42   BIC=4132.78
## 
## Training set error measures:
##                    ME    RMSE     MAE       MPE     MAPE      MASE        ACF1
## Training set 142.8618 1467.49 952.392 0.9149543 9.482212 0.5436101 -0.01895731

accuracy(wind_ar2)

##                    ME    RMSE     MAE       MPE     MAPE      MASE        ACF1
## Training set 142.8618 1467.49 952.392 0.9149543 9.482212 0.5436101 -0.01895731

wind_ar_f2 <- forecast(wind_ar2, 12)
autoplot(wind_ar_f2)

plot(wind_ar_f2, 12)

The ARIMA model seemed to be the best model out of all of them. The ARIMA had the lowest error statistics except for MPE which the second ETS model had the lower MPE.

accuracy(wind_f)

##                      ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -0.8312652 2146.871 1353.318 -3.283664 13.58344 0.7724523
##                     ACF1
## Training set 0.005648053

accuracy(wind_f2)

##                    ME     RMSE      MAE       MPE     MAPE      MASE       ACF1
## Training set 102.7595 2150.411 1349.662 0.4191599 12.97825 0.7703656 0.03938794

accuracy(wind_ar_f)

##                    ME    RMSE     MAE       MPE     MAPE      MASE        ACF1
## Training set 142.8618 1467.49 952.392 0.9149543 9.482212 0.5436101 -0.01895731

accuracy(wind_ar_f2)

##                    ME    RMSE     MAE       MPE     MAPE      MASE        ACF1
## Training set 142.8618 1467.49 952.392 0.9149543 9.482212 0.5436101 -0.01895731

I had some issues trying to compare with the test data. Below is the code I was using. If anyone has any tips, I’d greatly appreciate it.

# Issue comparing forecast and test
# accuracy(wind_f, wind_test_ts)
# accuracy(wind_f2, wind_test_ts)
# accuracy(wind_ar_f, wind_test_ts)
# accuracy(wind_ar_f2, wind_test_ts)

Discussion 4

Sal Pollastro

4/6/2022