Discussion 5

I chose to use the EIA data on wind power generation over the years. Energy data is very interesting to me as it relates to my job but also shows the real data of what is happening in the industry. The wind generation data is in thousands of megawatt hours. For reference, the average household uses 10.7 MWh annually, according to the EIA.

# Clear the workspace
rm(list = ls()) # Clear environment
gc()            # Clear unused memory
cat("\f")       # Clear the console

library("feasts")
library("seasonal")
library("tsibble")
library("tsibbledata")
library("dplyr")
library("ggplot2")
library("forecast")
library("fable")
library("weathermetrics")
library("fpp3")
library("lubridate")
library("fGarch")
library("rugarch")

# Set working directory
setwd("/Users/spoll/OneDrive/Documents/Boston College/Predictive Analytics_Forecasting/Week 4")

# Load the datasets
raw_energydata <- read.csv("Net_wind_generation_United_States_monthly.csv"
                           , check.names = FALSE
                           , stringsAsFactors = FALSE
                           , na.strings = ""
)
raw_energydata <- data.frame(raw_energydata)
#
#class(raw_energydata$Month)
#raw_energydata$Month <- as_date(raw_energydata$Month)
#raw_wind <- raw_energydata

#class(raw_wind$Month)
#raw_wind %>% 
 # mutate(Month=lubridate::dmy(Month))
#

# rename wind output column
names(raw_energydata)[2] <- 'mwhs'
raw_energydata$Month <- as.Date(raw_energydata$Month)

# Create test and train data frames
wind_train <- head(raw_energydata, nrow(raw_energydata)-6) %>%  
                           dplyr::select("Month", "mwhs")
wind_test <- tail(raw_energydata, 6) %>%
                           dplyr::select("Month", "mwhs")

Below I compared the different models. I referenced Professor Larry’s notes on Garch but the function “garch” no longer exists I guess in the “fGarch” package, so I switched to rugarch. I was able to create the garch model and plot some diagnostic graphs, but I’m struggling to figure out how to decipher those versus the other models to determine the optimum model. I see that it spits out the omega, alpha, and beta statistics. I did notice that the sum of the “weights” is > 1 which I feel like is incorrect, as it should equal 1.

# ETS model
wind_ts <- ts(wind_train$mwhs, frequency = 12)
wind_test_ts <- ts(wind_test$mwhs, frequency = 12)
plot(wind_ts)

wind_ets <- ets(wind_ts, model = 'AAN')  
wind_ets

## ETS(A,A,N) 
## 
## Call:
##  ets(y = wind_ts, model = "AAN") 
## 
##   Smoothing parameters:
##     alpha = 0.9031 
##     beta  = 1e-04 
## 
##   Initial states:
##     l = 410.8856 
##     b = 88.6213 
## 
##   sigma:  2164.469
## 
##      AIC     AICc      BIC 
## 5160.672 5160.921 5178.219

wind_f <- forecast(wind_ets)
plot(wind_ts ~ wind_f$fitted) + title(main = "ETS Model 1")

## integer(0)

autoplot(wind_f)

wind_ets2 <- ets(wind_ts, model = 'MNN')  
wind_ets2

## ETS(M,N,N) 
## 
## Call:
##  ets(y = wind_ts, model = "MNN") 
## 
##   Smoothing parameters:
##     alpha = 0.8656 
## 
##   Initial states:
##     l = 384.4365 
## 
##   sigma:  0.1735
## 
##      AIC     AICc      BIC 
## 4798.847 4798.946 4809.376

wind_f2 <- forecast(wind_ets2)
plot(wind_ts ~ wind_f2$fitted) + title(main = "ETS Model 2")

## integer(0)

autoplot(wind_f2)

# ARIMA Models
wind_ar1 <- auto.arima(wind_ts)
summary(wind_ar1)

## Series: wind_ts 
## ARIMA(0,1,3)(0,1,2)[12] 
## 
## Coefficients:
##           ma1      ma2      ma3     sma1    sma2
##       -0.7052  -0.0343  -0.0984  -0.8701  0.2369
## s.e.   0.0703   0.0774   0.0746   0.0923  0.0866
## 
## sigma^2 = 2322800:  log likelihood = -2050.03
## AIC=4112.05   AICc=4112.42   BIC=4132.78
## 
## Training set error measures:
##                    ME    RMSE     MAE       MPE     MAPE      MASE        ACF1
## Training set 142.8618 1467.49 952.392 0.9149543 9.482212 0.5436101 -0.01895731

accuracy(wind_ar1)

##                    ME    RMSE     MAE       MPE     MAPE      MASE        ACF1
## Training set 142.8618 1467.49 952.392 0.9149543 9.482212 0.5436101 -0.01895731

wind_ar_f <- forecast(wind_ar1)
plot(wind_ar_f)

autoplot(wind_ar_f)

wind_ar2 <- auto.arima(wind_ts, seasonal = TRUE, allowdrift = TRUE, stepwise = TRUE)
summary(wind_ar2)

## Series: wind_ts 
## ARIMA(0,1,3)(0,1,2)[12] 
## 
## Coefficients:
##           ma1      ma2      ma3     sma1    sma2
##       -0.7052  -0.0343  -0.0984  -0.8701  0.2369
## s.e.   0.0703   0.0774   0.0746   0.0923  0.0866
## 
## sigma^2 = 2322800:  log likelihood = -2050.03
## AIC=4112.05   AICc=4112.42   BIC=4132.78
## 
## Training set error measures:
##                    ME    RMSE     MAE       MPE     MAPE      MASE        ACF1
## Training set 142.8618 1467.49 952.392 0.9149543 9.482212 0.5436101 -0.01895731

accuracy(wind_ar2)

##                    ME    RMSE     MAE       MPE     MAPE      MASE        ACF1
## Training set 142.8618 1467.49 952.392 0.9149543 9.482212 0.5436101 -0.01895731

wind_ar_f2 <- forecast(wind_ar2, 12)
autoplot(wind_ar_f2)

plot(wind_ar_f2, 12)

# Garch Model
wind_ts=scale(wind_ts)
#wind_garch=garchFit(data = wind_ts, formula = garch(1,1), cond.dist = "QMLE", trace = FALSE)
#coef(wind_garch)
# summary(wind_garch)
# plot(wind_garch$residuals)
# wind_fc = predict(wind_garch, 6)
# plot(wind_fc$meanForecast, type = "1")

spec <- ugarchspec(mean.model = list(armaOrder = c(0, 0), include.mean = FALSE))
wind_garch2 <- ugarchfit(spec, data = wind_ts)
show(wind_garch2)

## 
## *---------------------------------*
## *          GARCH Model Fit        *
## *---------------------------------*
## 
## Conditional Variance Dynamics    
## -----------------------------------
## GARCH Model  : sGARCH(1,1)
## Mean Model   : ARFIMA(0,0,0)
## Distribution : norm 
## 
## Optimal Parameters
## ------------------------------------
##         Estimate  Std. Error  t value Pr(>|t|)
## omega   0.040492    0.015998  2.53110 0.011371
## alpha1  0.901107    0.160480  5.61507 0.000000
## beta1   0.076058    0.125816  0.60452 0.545498
## 
## Robust Standard Errors:
##         Estimate  Std. Error  t value Pr(>|t|)
## omega   0.040492    0.008722   4.6422 0.000003
## alpha1  0.901107    0.072187  12.4830 0.000000
## beta1   0.076058    0.072426   1.0502 0.293645
## 
## LogLikelihood : -284.7811 
## 
## Information Criteria
## ------------------------------------
##                    
## Akaike       2.3302
## Bayes        2.3728
## Shibata      2.3299
## Hannan-Quinn 2.3474
## 
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
##                         statistic p-value
## Lag[1]                      197.2       0
## Lag[2*(p+q)+(p+q)-1][2]     289.2       0
## Lag[4*(p+q)+(p+q)-1][5]     539.3       0
## d.o.f=0
## H0 : No serial correlation
## 
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
##                         statistic p-value
## Lag[1]                      1.772  0.1831
## Lag[2*(p+q)+(p+q)-1][5]     4.832  0.1670
## Lag[4*(p+q)+(p+q)-1][9]     6.690  0.2265
## d.o.f=2
## 
## Weighted ARCH LM Tests
## ------------------------------------
##             Statistic Shape Scale P-Value
## ARCH Lag[3]     2.572 0.500 2.000 0.10876
## ARCH Lag[5]     5.540 1.440 1.667 0.07697
## ARCH Lag[7]     5.857 2.315 1.543 0.15115
## 
## Nyblom stability test
## ------------------------------------
## Joint Statistic:  0.7322
## Individual Statistics:             
## omega  0.1433
## alpha1 0.6814
## beta1  0.3951
## 
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic:          0.846 1.01 1.35
## Individual Statistic:     0.35 0.47 0.75
## 
## Sign Bias Test
## ------------------------------------
##                    t-value     prob sig
## Sign Bias            2.881 0.004317 ***
## Negative Sign Bias   1.305 0.192966    
## Positive Sign Bias   1.551 0.122327    
## Joint Effect        10.237 0.016657  **
## 
## 
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
##   group statistic p-value(g-1)
## 1    20     787.3   1.101e-154
## 2    30     805.2   9.398e-151
## 3    40    1181.8   1.636e-222
## 4    50     926.4   2.527e-162
## 
## 
## Elapsed time : 0.08595109

#plot(wind_garch2)
coef(wind_garch2)

##      omega     alpha1      beta1 
## 0.04049156 0.90110732 0.07605847

summary(wind_garch2)

##    Length     Class      Mode 
##         1 uGARCHfit        S4

#wind_fc <- predict(wind_garch2,1)
par(mfrow = c(3, 4))
for (i in 1:12) {
  plot(wind_garch2, which = i)
}

## 
## please wait...calculating quantiles...

#accuracy(wind_garch2)

I definitely struggled with the interpretation of the garch model but it seems like an interesting concept considering it looks at volatility rather than the actual data. Please, if anyone has any critques I am all ears. A lot of you are much better coders/interprets of these models so I appreciate any feedback.

Discussion 5

Sal Pollastro

4/13/2022