I chose to use the EIA data on wind power generation over the years. Energy data is very interesting to me as it relates to my job but also shows the real data of what is happening in the industry. The wind generation data is in thousands of megawatt hours. For reference, the average household uses 10.7 MWh annually, according to the EIA.
# Clear the workspace
rm(list = ls()) # Clear environment
gc() # Clear unused memory
cat("\f") # Clear the console
library("feasts")
library("seasonal")
library("tsibble")
library("tsibbledata")
library("dplyr")
library("ggplot2")
library("forecast")
library("fable")
library("weathermetrics")
library("fpp3")
library("lubridate")
library("fGarch")
library("rugarch")
# Set working directory
setwd("/Users/spoll/OneDrive/Documents/Boston College/Predictive Analytics_Forecasting/Week 4")
# Load the datasets
raw_energydata <- read.csv("Net_wind_generation_United_States_monthly.csv"
, check.names = FALSE
, stringsAsFactors = FALSE
, na.strings = ""
)
raw_energydata <- data.frame(raw_energydata)
#
#class(raw_energydata$Month)
#raw_energydata$Month <- as_date(raw_energydata$Month)
#raw_wind <- raw_energydata
#class(raw_wind$Month)
#raw_wind %>%
# mutate(Month=lubridate::dmy(Month))
#
# rename wind output column
names(raw_energydata)[2] <- 'mwhs'
raw_energydata$Month <- as.Date(raw_energydata$Month)
# Create test and train data frames
wind_train <- head(raw_energydata, nrow(raw_energydata)-6) %>%
dplyr::select("Month", "mwhs")
wind_test <- tail(raw_energydata, 6) %>%
dplyr::select("Month", "mwhs")
Below I compared the different models. I referenced Professor Larry’s notes on Garch but the function “garch” no longer exists I guess in the “fGarch” package, so I switched to rugarch. I was able to create the garch model and plot some diagnostic graphs, but I’m struggling to figure out how to decipher those versus the other models to determine the optimum model. I see that it spits out the omega, alpha, and beta statistics. I did notice that the sum of the “weights” is > 1 which I feel like is incorrect, as it should equal 1.
# ETS model
wind_ts <- ts(wind_train$mwhs, frequency = 12)
wind_test_ts <- ts(wind_test$mwhs, frequency = 12)
plot(wind_ts)
wind_ets <- ets(wind_ts, model = 'AAN')
wind_ets
## ETS(A,A,N)
##
## Call:
## ets(y = wind_ts, model = "AAN")
##
## Smoothing parameters:
## alpha = 0.9031
## beta = 1e-04
##
## Initial states:
## l = 410.8856
## b = 88.6213
##
## sigma: 2164.469
##
## AIC AICc BIC
## 5160.672 5160.921 5178.219
wind_f <- forecast(wind_ets)
plot(wind_ts ~ wind_f$fitted) + title(main = "ETS Model 1")
## integer(0)
autoplot(wind_f)
wind_ets2 <- ets(wind_ts, model = 'MNN')
wind_ets2
## ETS(M,N,N)
##
## Call:
## ets(y = wind_ts, model = "MNN")
##
## Smoothing parameters:
## alpha = 0.8656
##
## Initial states:
## l = 384.4365
##
## sigma: 0.1735
##
## AIC AICc BIC
## 4798.847 4798.946 4809.376
wind_f2 <- forecast(wind_ets2)
plot(wind_ts ~ wind_f2$fitted) + title(main = "ETS Model 2")
## integer(0)
autoplot(wind_f2)
# ARIMA Models
wind_ar1 <- auto.arima(wind_ts)
summary(wind_ar1)
## Series: wind_ts
## ARIMA(0,1,3)(0,1,2)[12]
##
## Coefficients:
## ma1 ma2 ma3 sma1 sma2
## -0.7052 -0.0343 -0.0984 -0.8701 0.2369
## s.e. 0.0703 0.0774 0.0746 0.0923 0.0866
##
## sigma^2 = 2322800: log likelihood = -2050.03
## AIC=4112.05 AICc=4112.42 BIC=4132.78
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 142.8618 1467.49 952.392 0.9149543 9.482212 0.5436101 -0.01895731
accuracy(wind_ar1)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 142.8618 1467.49 952.392 0.9149543 9.482212 0.5436101 -0.01895731
wind_ar_f <- forecast(wind_ar1)
plot(wind_ar_f)
autoplot(wind_ar_f)
wind_ar2 <- auto.arima(wind_ts, seasonal = TRUE, allowdrift = TRUE, stepwise = TRUE)
summary(wind_ar2)
## Series: wind_ts
## ARIMA(0,1,3)(0,1,2)[12]
##
## Coefficients:
## ma1 ma2 ma3 sma1 sma2
## -0.7052 -0.0343 -0.0984 -0.8701 0.2369
## s.e. 0.0703 0.0774 0.0746 0.0923 0.0866
##
## sigma^2 = 2322800: log likelihood = -2050.03
## AIC=4112.05 AICc=4112.42 BIC=4132.78
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 142.8618 1467.49 952.392 0.9149543 9.482212 0.5436101 -0.01895731
accuracy(wind_ar2)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 142.8618 1467.49 952.392 0.9149543 9.482212 0.5436101 -0.01895731
wind_ar_f2 <- forecast(wind_ar2, 12)
autoplot(wind_ar_f2)
plot(wind_ar_f2, 12)
# Garch Model
wind_ts=scale(wind_ts)
#wind_garch=garchFit(data = wind_ts, formula = garch(1,1), cond.dist = "QMLE", trace = FALSE)
#coef(wind_garch)
# summary(wind_garch)
# plot(wind_garch$residuals)
# wind_fc = predict(wind_garch, 6)
# plot(wind_fc$meanForecast, type = "1")
spec <- ugarchspec(mean.model = list(armaOrder = c(0, 0), include.mean = FALSE))
wind_garch2 <- ugarchfit(spec, data = wind_ts)
show(wind_garch2)
##
## *---------------------------------*
## * GARCH Model Fit *
## *---------------------------------*
##
## Conditional Variance Dynamics
## -----------------------------------
## GARCH Model : sGARCH(1,1)
## Mean Model : ARFIMA(0,0,0)
## Distribution : norm
##
## Optimal Parameters
## ------------------------------------
## Estimate Std. Error t value Pr(>|t|)
## omega 0.040492 0.015998 2.53110 0.011371
## alpha1 0.901107 0.160480 5.61507 0.000000
## beta1 0.076058 0.125816 0.60452 0.545498
##
## Robust Standard Errors:
## Estimate Std. Error t value Pr(>|t|)
## omega 0.040492 0.008722 4.6422 0.000003
## alpha1 0.901107 0.072187 12.4830 0.000000
## beta1 0.076058 0.072426 1.0502 0.293645
##
## LogLikelihood : -284.7811
##
## Information Criteria
## ------------------------------------
##
## Akaike 2.3302
## Bayes 2.3728
## Shibata 2.3299
## Hannan-Quinn 2.3474
##
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 197.2 0
## Lag[2*(p+q)+(p+q)-1][2] 289.2 0
## Lag[4*(p+q)+(p+q)-1][5] 539.3 0
## d.o.f=0
## H0 : No serial correlation
##
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 1.772 0.1831
## Lag[2*(p+q)+(p+q)-1][5] 4.832 0.1670
## Lag[4*(p+q)+(p+q)-1][9] 6.690 0.2265
## d.o.f=2
##
## Weighted ARCH LM Tests
## ------------------------------------
## Statistic Shape Scale P-Value
## ARCH Lag[3] 2.572 0.500 2.000 0.10876
## ARCH Lag[5] 5.540 1.440 1.667 0.07697
## ARCH Lag[7] 5.857 2.315 1.543 0.15115
##
## Nyblom stability test
## ------------------------------------
## Joint Statistic: 0.7322
## Individual Statistics:
## omega 0.1433
## alpha1 0.6814
## beta1 0.3951
##
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic: 0.846 1.01 1.35
## Individual Statistic: 0.35 0.47 0.75
##
## Sign Bias Test
## ------------------------------------
## t-value prob sig
## Sign Bias 2.881 0.004317 ***
## Negative Sign Bias 1.305 0.192966
## Positive Sign Bias 1.551 0.122327
## Joint Effect 10.237 0.016657 **
##
##
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
## group statistic p-value(g-1)
## 1 20 787.3 1.101e-154
## 2 30 805.2 9.398e-151
## 3 40 1181.8 1.636e-222
## 4 50 926.4 2.527e-162
##
##
## Elapsed time : 0.08595109
#plot(wind_garch2)
coef(wind_garch2)
## omega alpha1 beta1
## 0.04049156 0.90110732 0.07605847
summary(wind_garch2)
## Length Class Mode
## 1 uGARCHfit S4
#wind_fc <- predict(wind_garch2,1)
par(mfrow = c(3, 4))
for (i in 1:12) {
plot(wind_garch2, which = i)
}
##
## please wait...calculating quantiles...
#accuracy(wind_garch2)
I definitely struggled with the interpretation of the garch model but it seems like an interesting concept considering it looks at volatility rather than the actual data. Please, if anyone has any critques I am all ears. A lot of you are much better coders/interprets of these models so I appreciate any feedback.