library(fpp3)
## Warning: package 'fpp3' was built under R version 4.3.3
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tsibble' was built under R version 4.3.3
## Warning: package 'tsibbledata' was built under R version 4.3.3
## Warning: package 'feasts' was built under R version 4.3.3
## Warning: package 'fabletools' was built under R version 4.3.3
## Warning: package 'fable' was built under R version 4.3.3
library(dplyr)
library(ggplot2)
library(readxl)
library(tsibble)
library(psych)
## Warning: package 'psych' was built under R version 4.3.3
library(tidyr)
library(forecast)
## Warning: package 'forecast' was built under R version 4.3.3
This project consists of 3 parts - two required and one bonus and is worth 15% of your grade. The project is due at 11:59 PM on Sunday Apr 11. I will accept late submissions with a penalty until the meetup after that when we review some projects.
In part A, I want you to forecast how much cash is taken out of 4 different ATM machines for May 2010. The data is given in a single file. The variable ‘Cash’ is provided in hundreds of dollars, other than that it is straight forward. I am being somewhat ambiguous on purpose to make this have a little more business feeling. Explain and demonstrate your process, techniques used and not used, and your actual forecast. I am giving you data via an excel file, please provide your written report on your findings, visuals, discussion and your R code via an RPubs link along with the actual.rmd file Also please submit the forecast which you will put in an Excel readable file.
Forecasting Power
Part B consists of a simple dataset of residential power usage for January 1998 until December 2013. Your assignment is to model these data and a monthly forecast for 2014. The data is given in a single file. The variable ‘KWH’ is power consumption in Kilowatt hours, the rest is straight forward. Add this to your existing files above.
BONUS, optional (part or all)
Part C consists of two data sets. These are simple 2 columns sets, however they have different time stamps. Your optional assignment is to time-base sequence the data and aggregate based on hour (example of what this looks like, follows). Note for multiple recordings within an hour, take the mean. Then to determine if the data is stationary and can it be forecast. If so, provide a week forward forecast and present results via Rpubs and .rmd and the forecast in an Excel readable file.
url <- "https://github.com/evanmclaughlin/DATA624/raw/main/ATM624Data.xlsx"
download.file(url, destfile = "ATM624Data.xlsx", mode = "wb")
atm <- read_excel("ATM624Data.xlsx")
head(atm)
## # A tibble: 6 × 3
## DATE ATM Cash
## <dbl> <chr> <dbl>
## 1 39934 ATM1 96
## 2 39934 ATM2 107
## 3 39935 ATM1 82
## 4 39935 ATM2 89
## 5 39936 ATM1 85
## 6 39936 ATM2 90
#let's fix the date first
atm$DATE <- as.Date(atm$DATE, origin = "1899-12-30")
head(atm$DATE)
## [1] "2009-05-01" "2009-05-01" "2009-05-02" "2009-05-02" "2009-05-03"
## [6] "2009-05-03"
summary(atm)
## DATE ATM Cash
## Min. :2009-05-01 Length:1474 Min. : 0.0
## 1st Qu.:2009-08-01 Class :character 1st Qu.: 0.5
## Median :2009-11-01 Mode :character Median : 73.0
## Mean :2009-10-31 Mean : 155.6
## 3rd Qu.:2010-02-01 3rd Qu.: 114.0
## Max. :2010-05-14 Max. :10919.8
## NA's :19
#let's start by looking at the outputs
atm %>% ggplot(aes(x = DATE, y = Cash, col = ATM)) +
geom_line(show.legend = FALSE) +
facet_wrap(~ ATM, ncol = 1, scales = "free_y") +
labs(title = "Cash Withdrawals", x = "Date") +
scale_y_continuous("Withdrawals")
## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_line()`).
Looks like we need to deal with the NAs as well as the outlier in ATM4.
ATM3 doesn’t seem to have much in the way of data except for the tail
end of the dataset, so it might not be worth spending much time on this
data.
There was obviously an issue with the date column, so I employed a conversion. There’s also an issue with decimal points that we need to address, and we’re going to need to determine how to deal with the NAs in the dataset. However, there are not that many, so we have som latitude in how we decide to address them, but we need to be mindful of how the NAs are clustered around dates. Let’s next take a look at the the cash withdrawal data.
#look at withdrawal data by ATM as tibble
atm_df <- atm %>%
filter(DATE < "2010-05-01") %>%
pivot_wider(names_from = ATM, values_from = Cash)
# Convert to a tsibble
atm_df <- atm_df %>%
as_tsibble(index = DATE)
# Display the first few rows
head(atm_df)
## # A tsibble: 6 x 5 [1D]
## DATE ATM1 ATM2 ATM3 ATM4
## <date> <dbl> <dbl> <dbl> <dbl>
## 1 2009-05-01 96 107 0 777.
## 2 2009-05-02 82 89 0 524.
## 3 2009-05-03 85 90 0 793.
## 4 2009-05-04 90 55 0 908.
## 5 2009-05-05 99 79 0 52.8
## 6 2009-05-06 88 19 0 52.2
#which records include NAs
atm_df[!complete.cases(atm_df), ]
## # A tsibble: 5 x 5 [1D]
## DATE ATM1 ATM2 ATM3 ATM4
## <date> <dbl> <dbl> <dbl> <dbl>
## 1 2009-06-13 NA 91 0 746.
## 2 2009-06-16 NA 82 0 373.
## 3 2009-06-18 21 NA 0 92.5
## 4 2009-06-22 NA 90 0 80.6
## 5 2009-06-24 66 NA 0 90.6
All our NAs are in ATMs 1 and 2. Let’s deal with our NAs by making these values the median values.
# replace with median and then check if there are any left
median_value <- median(atm_df[["ATM1"]], na.rm = TRUE)
atm_df[["ATM1"]][is.na(atm_df[["ATM1"]])] <- median_value
median_value <- median(atm_df[["ATM2"]], na.rm = TRUE)
atm_df[["ATM2"]][is.na(atm_df[["ATM2"]])] <- median_value
atm_df[!complete.cases(atm_df), ]
## # A tsibble: 0 x 5 [?]
## # ℹ 5 variables: DATE <date>, ATM1 <dbl>, ATM2 <dbl>, ATM3 <dbl>, ATM4 <dbl>
Now that we have successfully cleaned our data, let’s begin building our models. We’ll run models for each ATM and start with decomposition, given that we would benefit from imposing a weekly parameter and assessing seasonality.
atm1_df <- atm_df %>%
dplyr::select(DATE, ATM1)
atm1_df %>%
model(
STL(ATM1 ~ trend(window = 7) +
season(window = "periodic"),
robust = TRUE)) %>%
components() %>%
autoplot()
Let’s assess the need for differencing next.
ndiffs(atm1_df$ATM1)
## [1] 0
atm1_df %>%
ACF(ATM1, lag_max = 40) %>%
autoplot()
The ACF illustrates lags at various intervals. 7 stands out as the most obvious lag, but the plot trends downward, indicating that this dataset is non-stationary. What is less intuitive, however, is that differences are 0, so we cannot make this assumption. I’ll move forward ARIMA to investigate further.
I’ll rely on the Auto function to select the best forecast. Since we are forecasting for May 2010, I’ll filter the data for April and build my model.
# filter and train
train_atm1 <- atm1_df %>%
filter(DATE <= "2010-04-01")
atm1_fit <- train_atm1 %>%
model(
SNAIVE = SNAIVE(ATM1),
ETS = ETS(ATM1),
ARIMA = ARIMA(ATM1),
`Auto ARIMA` = ARIMA(ATM1, stepwise = FALSE, approx = FALSE)
)
forecast_atm1 <- atm1_fit %>%
forecast(h = 30)
forecast_atm1 %>%
autoplot(atm1_df, level = NULL)+
facet_wrap( ~ .model, scales = "free_y") +
guides(colour = guide_legend(title = "Forecast"))+
labs(title= "April - ATM1") +
xlab("Date") +
ylab("$")
accuracy(forecast_atm1, atm1_df) %>%
select(.model, RMSE:MAPE)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2010-05-01
## # A tibble: 4 × 5
## .model RMSE MAE MPE MAPE
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA 12.6 10.0 -85.8 88.9
## 2 Auto ARIMA 13.0 10.1 -98.9 102.
## 3 ETS 12.1 9.55 -64.5 67.8
## 4 SNAIVE 16.8 14.5 -69.5 76.6
ETS appears to enjoy the advantage across MAE, RMSE values.
# remade the model from source
ets_atm1_fit <- atm1_df %>%
model(ETS = ETS(ATM1))
ets_forecast_atm1 <- ets_atm1_fit %>%
forecast(h=30)
ets_forecast_atm1 %>%
autoplot(atm1_df) +
labs(title = "ETS: ATM1 - May",
y = "$")
atm2_df <- atm_df %>%
dplyr::select(DATE, ATM2)
atm2_df %>%
model(
STL(ATM2 ~ trend(window = 7) +
season(window = "periodic"),
robust = TRUE)) %>%
components() %>%
autoplot()
ndiffs(atm2_df$ATM2)
## [1] 1
atm2_df %>%
ACF(ATM2, lag_max = 40) %>%
autoplot()
The results from the same analysis for ATM2 are essentially the same, except that our differences count is 1, so we need to employ differencing to assess.
atm2_df <- atm2_df %>%
mutate(diff_ATM2= difference(ATM2))
We will run the model for differenced data as well as the alternative.
train_atm2 <- atm2_df %>%
filter(DATE <= "2010-04-01")
#non-differenced
nd_fit_atm2 <- train_atm2 %>%
model(
SNAIVE = SNAIVE(ATM2),
ETS = ETS(ATM2),
)
#differenced
diff_fit_atm2 <- train_atm2 %>%
slice(2:330) %>%
model(
ETS_diff = ETS(diff_ATM2),
ARIMA = ARIMA(diff_ATM2),
`Auto ARIMA` = ARIMA(diff_ATM2, stepwise = FALSE, approx = FALSE)
)
#forecasts
forecast_nd_atm2 <- nd_fit_atm2 %>%
forecast(h = 30)
diff_forecast_atm2 <- diff_fit_atm2 %>%
forecast(h = 30)
#visuals
forecast_nd_atm2 %>%
autoplot(atm2_df, level = NULL)+
facet_wrap( ~ .model, scales = "free_y") +
guides(colour = guide_legend(title = "Forecast non-diff"))+
labs(title= "ATM2: April Forecast") +
xlab("Date") +
ylab("$")
diff_forecast_atm2 %>%
autoplot(atm2_df, level = NULL)+
facet_wrap( ~ .model, scales = "free_y") +
guides(colour = guide_legend(title = "Forecast differenced"))+
labs(title= "ATM2: April Forecast") +
xlab("Date") +
ylab("$")
It’s hard to glean much from these outputs, so we’ll again run our
accuracy scores.
accuracy(forecast_nd_atm2, atm2_df) %>%
select(.model, RMSE:MAPE)
## # A tibble: 2 × 5
## .model RMSE MAE MPE MAPE
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 ETS 19.0 13.7 -29.3 59.4
## 2 SNAIVE 26.0 16.9 32.3 45.6
accuracy(diff_forecast_atm2, atm2_df) %>%
select(.model, RMSE:MAPE)
## # A tibble: 3 × 5
## .model RMSE MAE MPE MAPE
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA 26.2 20.3 80.7 115.
## 2 Auto ARIMA 25.2 19.2 73.2 96.2
## 3 ETS_diff 26.9 20.8 76.8 121.
The non-differenced ETS enjoys lower values accross all scores. We would opt for this model for our forecast.
ETS_fit_atm2 <- atm2_df %>%
model(
ETS = ETS(ATM2))
forecast_ETS_atm2 <- ETS_fit_atm2 %>%
forecast(h=30)
forecast_ETS_atm2 %>%
autoplot(atm2_df) +
labs(title = "ETS Forecast: ATM2 - May",
y = "$")
# ATM3
This ATM either represents poor data quality or data that is ultimately outside of any parameters to be relevant to our analysis, so I will not be assessing it.
We’ll move forward with the STL decomposition for ATM4 as well. But first, it’s worth keeping in mind the outlier which is clearly skewing the results and represents either a data quality issue or, at the very least, a non-representative aberration.
atm4_df <- atm_df %>%
select(DATE, ATM4)
atm4_df %>%
model(
STL(ATM4 ~ trend(window = 7) +
season(window = "periodic"),
robust = TRUE)) %>%
components() %>%
autoplot()
The variance justifies a box-cox transformation before proceeding with
the forecasting exercise. # Box-Cox
(lambda_atm4 <- atm4_df %>%
features(ATM4, features = guerrero) %>%
pull(lambda_guerrero))
## [1] -0.0737252
trans_atm4 <- BoxCox(atm4_df$ATM4, lambda = lambda_atm4)
atm4_df$ATM4_T<-trans_atm4
#now let's look at the data we extracted
atm4_df%>%
autoplot(ATM4_T)
ndiffs(atm4_df$ATM4)
## [1] 0
ndiffs(atm4_df$ATM4_T)
## [1] 0
atm4_df %>%
ACF(ATM4_T, lag_max = 28) %>%
autoplot()
We see above that differencing is unnecessary, but ACF figures above illustrate repeated lags, and so I am going to pursue differencing on the transformed dataset.
atm4_df <- atm4_df %>%
mutate(diff_ATM4= difference(ATM4_T))
train_atm4 <- atm4_df %>%
filter(DATE <= "2010-04-01")
#first model non-differenced
nd_fit_atm4 <- train_atm4 %>%
model(
SNAIVE = SNAIVE(ATM4_T),
ETS = ETS(ATM4_T),
)
#differenced model
diff_fit_atm4 <- train_atm4 %>%
slice(2:340) %>%
model(
ETS_diff = ETS(diff_ATM4),
ARIMA = ARIMA(diff_ATM4),
`Auto ARIMA` = ARIMA(diff_ATM4, stepwise = FALSE, approx = FALSE)
)
#forecasts for both diff and non-diff
forecast_nd_atm4 <- nd_fit_atm4 %>%
forecast(h = 30)
forecast_diff_atm4 <- diff_fit_atm4 %>%
forecast(h = 30)
#visuals
forecast_nd_atm4 %>%
autoplot(atm4_df, level = NULL)+
facet_wrap( ~ .model, scales = "free_y") +
guides(colour = guide_legend(title = "Forecast"))+
labs(title= "Non-Diff Forecasts: ATM4 - April") +
xlab("Date") +
ylab("$")
forecast_diff_atm4 %>%
autoplot(atm4_df, level = NULL)+
facet_wrap( ~ .model, scales = "free_y") +
guides(colour = guide_legend(title = "Forecast"))+
labs(title= "Diff Forecasts: ATM4 - April") +
xlab("Date") +
ylab("$")
Let’s take a look at the accuracy reports next.
accuracy(forecast_nd_atm4, atm4_df) %>%
select(.model, RMSE:MAPE)
## # A tibble: 2 × 5
## .model RMSE MAE MPE MAPE
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 ETS 1.07 0.740 -22.3 32.8
## 2 SNAIVE 0.861 0.510 -19.5 22.7
accuracy(forecast_diff_atm4, atm4_df) %>%
select(.model, RMSE:MAPE)
## # A tibble: 3 × 5
## .model RMSE MAE MPE MAPE
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA 1.64 1.26 104. 104.
## 2 Auto ARIMA 1.66 1.26 103. 103.
## 3 ETS_diff 1.73 1.33 21.7 184.
We can see from these numbers that the S-Naive model for the non-difference dataset enjoys the highest accuracy scores, so we’ll move forward with this one.
snaive_fit_atm4 <- atm4_df %>%
model(
SNAIVE = SNAIVE(ATM4_T))
forecast_snaive_atm4 <- snaive_fit_atm4 %>%
forecast(h=30)
#visuals
forecast_snaive_atm4 %>%
autoplot(atm4_df) +
labs(title = "Forecast: ATM4 - May 2010",
y = "$")
(atm4_forecast_results <-
as.data.frame(forecast_snaive_atm4) %>%
select(DATE, .mean) %>%
rename(Date = DATE, Cash = .mean)%>%
mutate(Cash=round(Cash,2)))
## Date Cash
## 1 2010-05-01 1.59
## 2 2010-05-02 5.04
## 3 2010-05-03 4.85
## 4 2010-05-04 2.38
## 5 2010-05-05 4.75
## 6 2010-05-06 3.31
## 7 2010-05-07 4.96
## 8 2010-05-08 1.59
## 9 2010-05-09 5.04
## 10 2010-05-10 4.85
## 11 2010-05-11 2.38
## 12 2010-05-12 4.75
## 13 2010-05-13 3.31
## 14 2010-05-14 4.96
## 15 2010-05-15 1.59
## 16 2010-05-16 5.04
## 17 2010-05-17 4.85
## 18 2010-05-18 2.38
## 19 2010-05-19 4.75
## 20 2010-05-20 3.31
## 21 2010-05-21 4.96
## 22 2010-05-22 1.59
## 23 2010-05-23 5.04
## 24 2010-05-24 4.85
## 25 2010-05-25 2.38
## 26 2010-05-26 4.75
## 27 2010-05-27 3.31
## 28 2010-05-28 4.96
## 29 2010-05-29 1.59
## 30 2010-05-30 5.04
forecast_atm1 <- atm1_fit
forecast_atm2 <- ETS_fit_atm2
forecast_atm4 <- forecast_snaive_atm4
write.csv(forecast_atm1,"forecast_atm1.csv")
write.csv(forecast_atm2,"forecast_atm2.csv")
write.csv(forecast_atm4,"forecast_atm4.csv")
#Data Importing and Cleaning
# Load and prepare data
rcfl <- read_excel("ResidentialCustomerForecastLoad-624.xlsx")
rcfl <- rcfl %>%
rename(date = `YYYY-MMM`) %>%
mutate(date = yearmonth(date), KWH = KWH / 1000) %>%
select(-CaseSequence) %>%
as_tsibble(index = date)
Let’s take a look at the missing values
which(is.na(rcfl), arr.ind=TRUE)
## row col
## [1,] 129 2
We’ve located the missing value at 2008-Sep, and I’ve converted and renamed the date column to ‘date.’ I’v also removed CaseSequence since it adds nothing to our analysis. Now we can separate the month and year data and get a quick visual of what we’re working with.
rcfl_2 <-ts(rcfl[, "KWH"], start = c(1998, 1), frequency = 12)
ggseasonplot(rcfl_2)+ggtitle('Residential power usage: by year')
#head(rcfl_2)
We can justfiy using median temperature across summer months for this obvious outlier value since the data is clearly seasonal. I will interpolate for the missing value.
# Convert to a time series object
rcfl_ts <- ts(rcfl$KWH, start = c(1998, 1), frequency = 12)
# Interpolate missing values
rcfl_ts <- na.interp(rcfl_ts)
#establish outlier threshold and replace july outlier
july_usage <- rcfl_ts[cycle(rcfl_ts) == 7]
lower_bound <- quantile(july_usage, 0.1)
median_july <- median(july_usage[july_usage > lower_bound])
rcfl_ts[cycle(rcfl_ts) == 7 & rcfl_ts < lower_bound] <- median_july
# Plot the updated data
ggseasonplot(rcfl_ts) +
ggtitle("Residential Power Usage: By Year")
We seem to have successfully dealt with our outlier. I’ll next conduct
transform the data using Box Cox.
#rcfl_ts
#calculate lambda
rcfl_lambda <- BoxCox.lambda(rcfl_ts)
rcfl_lambda
## [1] -0.1949435
#apply BC and display transformed data
rcfl_ts_transformed <- BoxCox(rcfl_ts, rcfl_lambda)
autoplot(rcfl_ts_transformed) +
ggtitle("Box-Cox Transformed Residential Power Usage") +
xlab("Time") +
ylab("Transformed KWH")
The box cox transformation has stabilized the variance and the seasonal
patterns are consistent over time.
We’ll employ decomposition again to help assess seasonality and determine if differencing is necessary.
rcfl_decomp <- stl(rcfl_ts_transformed, s.window = "periodic")
autoplot(rcfl_decomp) +
ggtitle("STL Decomposition")
# differencing check and ACF check
ndiffs(rcfl_ts_transformed)
## [1] 1
autoplot(acf(rcfl_ts_transformed, plot = FALSE)) +
ggtitle("ACF")
Differencing is necessary for our exercise.
rcfl_ts_differenced <- diff(rcfl_ts_transformed, differences = 1)
autoplot(rcfl_ts_differenced) +
ggtitle("Differenced power usage") +
ylab("KWH: Diff")
# Models
# we'll proceed with ARIMA model using the differenced data but I need to calculate differenced KWH separately and make sure that the data is coming in correctly
rcfl <- read_excel("ResidentialCustomerForecastLoad-624.xlsx")
rcfl <- rcfl %>%
rename(date = `YYYY-MMM`) %>%
mutate(date = yearmonth(date), KWH = KWH / 1000) %>%
select(-CaseSequence) %>%
as_tsibble(index = date)
diff_KWH <- diff(rcfl$KWH)
# we need to deal with our differenced NA value
median_diff_KWH <- median(diff_KWH, na.rm = TRUE)
diff_KWH <- c(median_diff_KWH, diff_KWH)
rcfl_differenced <- rcfl %>%
mutate(diff_KWH = diff_KWH)
#let's take a look
head(rcfl_differenced)
## # A tsibble: 6 x 3 [1M]
## date KWH diff_KWH
## <mth> <dbl> <dbl>
## 1 1998 Jan 6863. -69.9
## 2 1998 Feb 5838. -1024.
## 3 1998 Mar 5421. -418.
## 4 1998 Apr 5010. -410.
## 5 1998 May 4665. -345.
## 6 1998 Jun 6467. 1802.
now the columns are the same length and we can fit ARIMA models on differenced data.
# split differenced data
rcfl_train_diff <- rcfl_differenced %>%
filter(year(date) < 2013)
#differenced models
rcfl_fit_diff <- rcfl_train_diff %>%
model(
ARIMA = ARIMA(diff_KWH),
`Auto ARIMA` = ARIMA(diff_KWH, stepwise = FALSE, approx = FALSE)
)
#forecast 2013 data
rcfl_forecast_diff <- rcfl_fit_diff %>%
forecast(h = "1 year")
#visualize
rcfl_forecast_diff %>%
autoplot(rcfl_train_diff, level = NULL) +
facet_wrap(~ .model, scales = "free_y") +
guides(colour = guide_legend(title = "Forecast")) +
labs(title = "Differenced 2013 forecast: KWH") +
xlab("Month") +
ylab("KWH")
#non-differenced ARIMA
#split
rcfl_train_nd <- rcfl %>%
filter(year(date) < 2013)
#non-diff fit
rcfl_fit_nd <- rcfl_train_nd %>%
model(
ARIMA = ARIMA(KWH),
`Auto ARIMA` = ARIMA(KWH, stepwise = FALSE, approx = FALSE)
)
rcfl_forecast_nd <- rcfl_fit_nd %>%
forecast(h = "1 year")
rcfl_forecast_nd %>%
autoplot(rcfl_train_nd, level = NULL) +
facet_wrap(~ .model, scales = "free_y") +
guides(colour = guide_legend(title = "Forecast")) +
labs(title = "Non-Differenced 2013 forecast: KWH") +
xlab("Month") +
ylab("KWH")
LEt’s take a look at the accuracy reports.
#differenced first
accuracy(rcfl_forecast_diff, rcfl_differenced) %>%
select(.model, RMSE:MAE)
## # A tibble: 2 × 3
## .model RMSE MAE
## <chr> <dbl> <dbl>
## 1 ARIMA 1971. 1384.
## 2 Auto ARIMA 1782. 1302.
#non-diff RMSE, MAE
accuracy(rcfl_forecast_nd, rcfl) %>%
select(.model, RMSE:MAE)
## # A tibble: 2 × 3
## .model RMSE MAE
## <chr> <dbl> <dbl>
## 1 ARIMA 1499. 972.
## 2 Auto ARIMA 1536. 1004.
Non-differenced ARIMA is the superior model going off of the RMSE and MAE values. Now let’s re-create the model with the original dataset
rcfl_ARIMA_fit <- rcfl %>%
model(`Additive ARIMA` = ARIMA(KWH))
rcfl_ARIMA_forecast <- rcfl_ARIMA_fit %>%
forecast(h=12)
#visualize
rcfl_ARIMA_forecast %>%
autoplot(rcfl) +
labs(title = "Non-Differenced ARIMA: Monthly Residential KWH",
y = "KWH")
#publish forecast results
(rcfl_forecast_results <-
as.data.frame(rcfl_ARIMA_forecast) %>%
select(date, .mean) %>%
rename('KWH Forecast' = .mean))
## date KWH Forecast
## 1 2014 Jan 9790.458
## 2 2014 Feb 8624.753
## 3 2014 Mar 6623.643
## 4 2014 Apr 5974.459
## 5 2014 May 5935.072
## 6 2014 Jun 8199.158
## 7 2014 Jul 9498.803
## 8 2014 Aug 9999.104
## 9 2014 Sep 8456.634
## 10 2014 Oct 5860.011
## 11 2014 Nov 6143.003
## 12 2014 Dec 8245.603