Instructions

This project consists of 3 parts - two required and one bonus and is worth 15% of your grade. The project is due at 11:59 PM on Sunday Oct 25. I will accept late submissions with a penalty until the meetup after that when we review some projects.

Part A – ATM Forecast, ATM624Data.xlsx

In part A, I want you to forecast how much cash is taken out of 4 different ATM machines for May 2010.

The data is given in a single file. The variable ‘Cash’ is provided in hundreds of dollars, other than that it is straight forward. I am being somewhat ambiguous on purpose to make this have a little more business feeling. Explain and demonstrate your process, techniques used and not used, and your actual forecast. I am giving you data via an excel file, please provide your written report on your findings, visuals, discussion and your R code via an RPubs link along with the actual.rmd file. Also please submit the forecast which you will put in an Excel readable file.

  1. Data Importing and Cleaning
  1. Modeling
  1. Forecasting
file_path <- "/Users/benson/Desktop/DATA 624/DATA624 Project1/ATM624Data.xlsx"
ATM_df <- read_excel(file_path, sheet = "ATM Data")

### Cleaning ###
ATM_df$DATE <- as.Date(ATM_df$DATE, origin = "1899-12-30")

ATM_df <- ATM_df[!(is.na(ATM_df$Cash) | ATM_df$Cash == ""), ]

full_dates <- seq(min(ATM_df$DATE), max(ATM_df$DATE), by = "day")

ATM_df_complete <- ATM_df %>%
  tidyr::complete(DATE = full_dates, ATM, fill = list(Cash = 0))  

ATM_df_complete <- ATM_df_complete %>%
  mutate(YearMonth = format(DATE, "%Y-%m"))

ATM_df_monthly <- ATM_df_complete %>%
  group_by(ATM, YearMonth) %>%
  summarise(Total_Cash = sum(Cash, na.rm = TRUE), .groups = "drop")

ATM_df_monthly <- ATM_df_monthly %>%
  mutate(YearMonth = yearmonth(YearMonth)) %>%
  as_tsibble(index = YearMonth, key = ATM)

### ATMs Plot ###
ATM_df_monthly %>%
  filter(ATM %in% c("ATM1", "ATM2", "ATM3", "ATM4")) %>%
  ggplot(aes(x = YearMonth, y = Total_Cash, color = ATM)) +
  geom_line() +
  labs(title = "Monthly Cash Withdrawals by ATM", x = "Year-Month", y = "Total Cash (Hundreds)") +
  theme_minimal()

### Seasonal ?? ###
ATM_df_monthly %>%
  filter(ATM %in% c("ATM1", "ATM2", "ATM3", "ATM4")) %>%
  gg_season(Total_Cash, period = "year") +
  labs(title = "Seasonal Patterns in Cash Withdrawals by ATM")

#### ATM1 ###
ATM1_data <- ATM_df_monthly %>% filter(ATM == "ATM1")

fit_arima_atm1 <- ATM1_data %>%
model(ARIMA(Total_Cash ~ pdq(0,0,1) + 1))
report(fit_arima_atm1)
## Series: Total_Cash 
## Model: ARIMA(0,0,1) w/ mean 
## 
## Coefficients:
##          ma1   constant
##       0.7454  2515.7939
## s.e.  0.2610    94.0972
## 
## sigma^2 estimated as 44566:  log likelihood=-80.57
## AIC=167.13   AICc=170.13   BIC=168.59
fit_arima_atm1 %>%
  gg_tsresiduals() +
  labs(title = "Residual Diagnostics for ATM1 (ARIMA Model)")

forecast_atm1 <- fit_arima_atm1 %>%
  forecast(h = 1)

autoplot(ATM1_data, Total_Cash) +
  autolayer(forecast_atm1, level =95) +
  labs(
    title = "ATM1: Historical Cash Withdrawals and Forecast for May 2010 (ARIMA Model(0,0,1))",
    x = "Year-Month",
    y = "Total Cash Withdrawn"
  ) +
  theme_minimal()

### ATM2 ###
ATM2_data <- ATM_df_monthly %>%  filter(ATM == "ATM2")

fit_ets_atm2 <- ATM2_data %>%
model(ETS(Total_Cash))
report(fit_ets_atm2)
## Series: Total_Cash 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.6855545 
## 
##   Initial states:
##      l[0]
##  2269.309
## 
##   sigma^2:  37053.51
## 
##      AIC     AICc      BIC 
## 159.8724 162.8724 161.3272
fit_ets_atm2 %>%
  gg_tsresiduals() +
  labs(title = "Residual Diagnostics for ATM2 (ETS Model)")

forecast_atm2 <- fit_ets_atm2 %>%
  forecast(h = 1)

autoplot(ATM2_data, Total_Cash) +
  autolayer(forecast_atm2, level =95) +
  labs(
    title = "ATM2: Historical Cash Withdrawals and Forecast for May 2010 (ETS Model)",
    x = "Year-Month",
    y = "Total Cash Withdrawn"
  ) +
  theme_minimal()

### SKIP ATM3 cause insufficient data ###

### ATM4 ###
ATM4_data <- ATM_df_monthly %>%  filter(ATM == "ATM4")

fit_arima_diff_atm4 <- ATM4_data %>%
  model(ARIMA_Diff = ARIMA(Total_Cash ~ pdq(1,1,0)))

report(fit_arima_diff_atm4)
## Series: Total_Cash 
## Model: ARIMA(1,1,0) 
## 
## Coefficients:
##           ar1
##       -0.3213
## s.e.   0.2704
## 
## sigma^2 estimated as 22386323:  log likelihood=-108.22
## AIC=220.44   AICc=221.94   BIC=221.24
fit_arima_diff_atm4 %>%
  gg_tsresiduals() +
  labs(title = "Residual Diagnostics for ATM4 (ARIMA Model)")

forecast_atm4 <- fit_arima_diff_atm4 %>%
  forecast(h = 1)

autoplot(ATM4_data, Total_Cash) +
  autolayer(forecast_atm4, level =95) +
  labs(
    title = "ATM4: Historical Cash Withdrawals and Forecast for May 2010 (ARIMA(1,1,0))",
    x = "Year-Month",
    y = "Total Cash Withdrawn"
  ) +
  theme_minimal()

## ATM4 ETS
fit_ets_atm4 <- ATM4_data %>%
model(ETS(Total_Cash ~trend()))
report(fit_ets_atm4)
## Series: Total_Cash 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.0001001357 
## 
##   Initial states:
##      l[0]
##  14417.57
## 
##   sigma^2:  12842979
## 
##      AIC     AICc      BIC 
## 230.0507 233.0507 231.5054
fit_ets_atm4 %>%
  gg_tsresiduals() +
  labs(title = "Residual Diagnostics for ATM4  (ETS ~ trend Model)")

forecast_ets_atm4 <- fit_ets_atm4 %>%
  forecast(h = 1)

autoplot(ATM4_data, Total_Cash) +
  autolayer(forecast_ets_atm4, level =95) +
  labs(
    title = "ATM4: Historical Cash Withdrawals and Forecast for May 2010 (ETS ~ trend Model)",
    x = "Year-Month",
    y = "Total Cash Withdrawn"
  ) +
  theme_minimal()

#Export Excel##
forecasts <- bind_rows(
  forecast_atm1 %>% as_tibble() %>% mutate(ATM = "ATM1"),
  forecast_atm2 %>% as_tibble() %>% mutate(ATM = "ATM2"),
  forecast_ets_atm4 %>% as_tibble() %>% mutate(ATM = "ATM4")
)

write_xlsx(forecasts, "ATM_Forecasts_May2010.xlsx")

Part B – Forecasting Power, ResidentialCustomerForecastLoad-624.xlsx

Part B consists of a simple dataset of residential power usage for January 1998 until December 2013. Your assignment is to model these data and a monthly forecast for 2014. The data is given in a single file. The variable ‘KWH’ is power consumption in Kilowatt hours, the rest is straight forward. Add this to your existing files above.

I did not process part A well while looking for the best model. (I felt messy and unstructured) *** In part B, I use auto ARIMA, which can easily compare the best-fit ARIMA model, which helps a lot in finding the best-fit model.

  1. Data Importing and Cleaning
  1. Modeling
  1. Forecasting
file_b_path <- "/Users/benson/Desktop/DATA 624/DATA624 Project1/ResidentialCustomerForecastLoad-624.xlsx"

kwh_df <- read_excel(file_b_path, sheet = "ResidentialCustomerForecastLoad") %>%
  rename(DATE = `YYYY-MMM`) 

### Cleaning ###
#print(sum(is.na(kwh_df$KWH)))
kwh_df$DATE <- as.Date(paste0(kwh_df$DATE, "-01"), format = "%Y-%b-%d")
kwh_df$KWH <- na.approx(kwh_df$KWH, na.rm = FALSE)
head(kwh_df)
## # A tibble: 6 × 3
##   CaseSequence DATE           KWH
##          <dbl> <date>       <dbl>
## 1          733 1998-01-01 6862583
## 2          734 1998-02-01 5838198
## 3          735 1998-03-01 5420658
## 4          736 1998-04-01 5010364
## 5          737 1998-05-01 4665377
## 6          738 1998-06-01 6467147
kwh_ts <- ts(kwh_df$KWH, start = c(1998, 1), frequency = 12)

plot(kwh_ts, main = "kwh_ts", ylab = "KWH", xlab = "Date") +
  theme_minimal()

## NULL
### ARIMA ###
train_ts <- window(kwh_ts, end = c(2011, 12))  
test_ts <- window(kwh_ts, start = c(2012, 1))  

auto_arima_model <- auto.arima(train_ts, 
                              seasonal = TRUE, 
                              stepwise = FALSE,  
                              approximation = FALSE, 
                              trace = TRUE)     
## 
##  ARIMA(0,0,0)(0,1,0)[12]                    : 4814.924
##  ARIMA(0,0,0)(0,1,0)[12] with drift         : 4815.585
##  ARIMA(0,0,0)(0,1,1)[12]                    : 4742.436
##  ARIMA(0,0,0)(0,1,1)[12] with drift         : 4735.749
##  ARIMA(0,0,0)(0,1,2)[12]                    : 4727.147
##  ARIMA(0,0,0)(0,1,2)[12] with drift         : 4723.273
##  ARIMA(0,0,0)(1,1,0)[12]                    : 4748.628
##  ARIMA(0,0,0)(1,1,0)[12] with drift         : 4746.914
##  ARIMA(0,0,0)(1,1,1)[12]                    : 4730.278
##  ARIMA(0,0,0)(1,1,1)[12] with drift         : 4724.784
##  ARIMA(0,0,0)(1,1,2)[12]                    : 4727.185
##  ARIMA(0,0,0)(1,1,2)[12] with drift         : 4723.38
##  ARIMA(0,0,0)(2,1,0)[12]                    : 4720.689
##  ARIMA(0,0,0)(2,1,0)[12] with drift         : 4715.75
##  ARIMA(0,0,0)(2,1,1)[12]                    : 4722.789
##  ARIMA(0,0,0)(2,1,1)[12] with drift         : 4717.748
##  ARIMA(0,0,0)(2,1,2)[12]                    : 4723.152
##  ARIMA(0,0,0)(2,1,2)[12] with drift         : 4718.814
##  ARIMA(0,0,1)(0,1,0)[12]                    : 4807.532
##  ARIMA(0,0,1)(0,1,0)[12] with drift         : 4808.587
##  ARIMA(0,0,1)(0,1,1)[12]                    : 4732.61
##  ARIMA(0,0,1)(0,1,1)[12] with drift         : 4728.181
##  ARIMA(0,0,1)(0,1,2)[12]                    : 4722.231
##  ARIMA(0,0,1)(0,1,2)[12] with drift         : 4719.64
##  ARIMA(0,0,1)(1,1,0)[12]                    : 4741.492
##  ARIMA(0,0,1)(1,1,0)[12] with drift         : 4740.992
##  ARIMA(0,0,1)(1,1,1)[12]                    : 4723.947
##  ARIMA(0,0,1)(1,1,1)[12] with drift         : 4720.522
##  ARIMA(0,0,1)(1,1,2)[12]                    : 4723.409
##  ARIMA(0,0,1)(1,1,2)[12] with drift         : 4720.818
##  ARIMA(0,0,1)(2,1,0)[12]                    : 4717.77
##  ARIMA(0,0,1)(2,1,0)[12] with drift         : 4714.621
##  ARIMA(0,0,1)(2,1,1)[12]                    : 4719.699
##  ARIMA(0,0,1)(2,1,1)[12] with drift         : 4716.385
##  ARIMA(0,0,1)(2,1,2)[12]                    : 4720.713
##  ARIMA(0,0,1)(2,1,2)[12] with drift         : 4717.866
##  ARIMA(0,0,2)(0,1,0)[12]                    : 4808.475
##  ARIMA(0,0,2)(0,1,0)[12] with drift         : 4809.664
##  ARIMA(0,0,2)(0,1,1)[12]                    : 4733.641
##  ARIMA(0,0,2)(0,1,1)[12] with drift         : 4729.705
##  ARIMA(0,0,2)(0,1,2)[12]                    : 4724.297
##  ARIMA(0,0,2)(0,1,2)[12] with drift         : 4721.791
##  ARIMA(0,0,2)(1,1,0)[12]                    : 4743.314
##  ARIMA(0,0,2)(1,1,0)[12] with drift         : 4742.613
##  ARIMA(0,0,2)(1,1,1)[12]                    : 4726.014
##  ARIMA(0,0,2)(1,1,1)[12] with drift         : 4722.684
##  ARIMA(0,0,2)(1,1,2)[12]                    : 4725.563
##  ARIMA(0,0,2)(1,1,2)[12] with drift         : 4723.004
##  ARIMA(0,0,2)(2,1,0)[12]                    : 4719.573
##  ARIMA(0,0,2)(2,1,0)[12] with drift         : 4716.683
##  ARIMA(0,0,2)(2,1,1)[12]                    : 4721.488
##  ARIMA(0,0,2)(2,1,1)[12] with drift         : 4718.449
##  ARIMA(0,0,3)(0,1,0)[12]                    : 4810.044
##  ARIMA(0,0,3)(0,1,0)[12] with drift         : 4811.338
##  ARIMA(0,0,3)(0,1,1)[12]                    : 4733.328
##  ARIMA(0,0,3)(0,1,1)[12] with drift         : 4730.194
##  ARIMA(0,0,3)(0,1,2)[12]                    : 4722.24
##  ARIMA(0,0,3)(0,1,2)[12] with drift         : 4720.635
##  ARIMA(0,0,3)(1,1,0)[12]                    : 4743.342
##  ARIMA(0,0,3)(1,1,0)[12] with drift         : 4743.106
##  ARIMA(0,0,3)(1,1,1)[12]                    : 4724.595
##  ARIMA(0,0,3)(1,1,1)[12] with drift         : 4722.157
##  ARIMA(0,0,3)(2,1,0)[12]                    : 4717.265
##  ARIMA(0,0,3)(2,1,0)[12] with drift         : 4715.17
##  ARIMA(0,0,4)(0,1,0)[12]                    : 4811.182
##  ARIMA(0,0,4)(0,1,0)[12] with drift         : 4812.386
##  ARIMA(0,0,4)(0,1,1)[12]                    : 4735.48
##  ARIMA(0,0,4)(0,1,1)[12] with drift         : 4732.338
##  ARIMA(0,0,4)(1,1,0)[12]                    : 4745.398
##  ARIMA(0,0,4)(1,1,0)[12] with drift         : 4745.02
##  ARIMA(0,0,5)(0,1,0)[12]                    : 4811.895
##  ARIMA(0,0,5)(0,1,0)[12] with drift         : 4813.285
##  ARIMA(1,0,0)(0,1,0)[12]                    : 4806.36
##  ARIMA(1,0,0)(0,1,0)[12] with drift         : 4807.539
##  ARIMA(1,0,0)(0,1,1)[12]                    : 4730.728
##  ARIMA(1,0,0)(0,1,1)[12] with drift         : 4727.128
##  ARIMA(1,0,0)(0,1,2)[12]                    : 4721.692
##  ARIMA(1,0,0)(0,1,2)[12] with drift         : 4719.392
##  ARIMA(1,0,0)(1,1,0)[12]                    : 4742.14
##  ARIMA(1,0,0)(1,1,0)[12] with drift         : 4741.788
##  ARIMA(1,0,0)(1,1,1)[12]                    : 4723.38
##  ARIMA(1,0,0)(1,1,1)[12] with drift         : 4720.359
##  ARIMA(1,0,0)(1,1,2)[12]                    : 4723.079
##  ARIMA(1,0,0)(1,1,2)[12] with drift         : 4720.739
##  ARIMA(1,0,0)(2,1,0)[12]                    : 4717.146
##  ARIMA(1,0,0)(2,1,0)[12] with drift         : 4714.365
##  ARIMA(1,0,0)(2,1,1)[12]                    : 4718.991
##  ARIMA(1,0,0)(2,1,1)[12] with drift         : 4716.076
##  ARIMA(1,0,0)(2,1,2)[12]                    : 4720.067
##  ARIMA(1,0,0)(2,1,2)[12] with drift         : 4717.575
##  ARIMA(1,0,1)(0,1,0)[12]                    : 4808.214
##  ARIMA(1,0,1)(0,1,0)[12] with drift         : 4809.474
##  ARIMA(1,0,1)(0,1,1)[12]                    : 4731.71
##  ARIMA(1,0,1)(0,1,1)[12] with drift         : 4728.886
##  ARIMA(1,0,1)(0,1,2)[12]                    : 4722.309
##  ARIMA(1,0,1)(0,1,2)[12] with drift         : 4720.995
##  ARIMA(1,0,1)(1,1,0)[12]                    : 4742.503
##  ARIMA(1,0,1)(1,1,0)[12] with drift         : 4741.643
##  ARIMA(1,0,1)(1,1,1)[12]                    : 4724.469
##  ARIMA(1,0,1)(1,1,1)[12] with drift         : 4722.282
##  ARIMA(1,0,1)(1,1,2)[12]                    : 4723.803
##  ARIMA(1,0,1)(1,1,2)[12] with drift         : 4722.49
##  ARIMA(1,0,1)(2,1,0)[12]                    : 4716.028
##  ARIMA(1,0,1)(2,1,0)[12] with drift         : 4714.931
##  ARIMA(1,0,1)(2,1,1)[12]                    : 4717.937
##  ARIMA(1,0,1)(2,1,1)[12] with drift         : 4716.713
##  ARIMA(1,0,2)(0,1,0)[12]                    : 4810.319
##  ARIMA(1,0,2)(0,1,0)[12] with drift         : 4811.594
##  ARIMA(1,0,2)(0,1,1)[12]                    : 4733.675
##  ARIMA(1,0,2)(0,1,1)[12] with drift         : 4731.017
##  ARIMA(1,0,2)(0,1,2)[12]                    : 4724.076
##  ARIMA(1,0,2)(0,1,2)[12] with drift         : 4722.98
##  ARIMA(1,0,2)(1,1,0)[12]                    : 4744.251
##  ARIMA(1,0,2)(1,1,0)[12] with drift         : 4743.583
##  ARIMA(1,0,2)(1,1,1)[12]                    : Inf
##  ARIMA(1,0,2)(1,1,1)[12] with drift         : 4724.226
##  ARIMA(1,0,2)(2,1,0)[12]                    : 4718.172
##  ARIMA(1,0,2)(2,1,0)[12] with drift         : 4717.123
##  ARIMA(1,0,3)(0,1,0)[12]                    : 4805.722
##  ARIMA(1,0,3)(0,1,0)[12] with drift         : 4807.037
##  ARIMA(1,0,3)(0,1,1)[12]                    : 4735.274
##  ARIMA(1,0,3)(0,1,1)[12] with drift         : 4732.325
##  ARIMA(1,0,3)(1,1,0)[12]                    : 4745.336
##  ARIMA(1,0,3)(1,1,0)[12] with drift         : 4745.054
##  ARIMA(1,0,4)(0,1,0)[12]                    : 4807.694
##  ARIMA(1,0,4)(0,1,0)[12] with drift         : 4808.992
##  ARIMA(2,0,0)(0,1,0)[12]                    : 4808.215
##  ARIMA(2,0,0)(0,1,0)[12] with drift         : 4809.469
##  ARIMA(2,0,0)(0,1,1)[12]                    : 4732.151
##  ARIMA(2,0,0)(0,1,1)[12] with drift         : 4728.997
##  ARIMA(2,0,0)(0,1,2)[12]                    : 4723.473
##  ARIMA(2,0,0)(0,1,2)[12] with drift         : 4721.428
##  ARIMA(2,0,0)(1,1,0)[12]                    : 4744.076
##  ARIMA(2,0,0)(1,1,0)[12] with drift         : 4743.555
##  ARIMA(2,0,0)(1,1,1)[12]                    : 4725.244
##  ARIMA(2,0,0)(1,1,1)[12] with drift         : 4722.469
##  ARIMA(2,0,0)(1,1,2)[12]                    : 4725.004
##  ARIMA(2,0,0)(1,1,2)[12] with drift         : 4722.878
##  ARIMA(2,0,0)(2,1,0)[12]                    : 4718.351
##  ARIMA(2,0,0)(2,1,0)[12] with drift         : 4716.122
##  ARIMA(2,0,0)(2,1,1)[12]                    : 4720.153
##  ARIMA(2,0,0)(2,1,1)[12] with drift         : 4717.813
##  ARIMA(2,0,1)(0,1,0)[12]                    : 4810.324
##  ARIMA(2,0,1)(0,1,0)[12] with drift         : 4811.608
##  ARIMA(2,0,1)(0,1,1)[12]                    : 4733.734
##  ARIMA(2,0,1)(0,1,1)[12] with drift         : Inf
##  ARIMA(2,0,1)(0,1,2)[12]                    : 4724.21
##  ARIMA(2,0,1)(0,1,2)[12] with drift         : 4723.07
##  ARIMA(2,0,1)(1,1,0)[12]                    : 4744.014
##  ARIMA(2,0,1)(1,1,0)[12] with drift         : 4743.474
##  ARIMA(2,0,1)(1,1,1)[12]                    : Inf
##  ARIMA(2,0,1)(1,1,1)[12] with drift         : 4724.353
##  ARIMA(2,0,1)(2,1,0)[12]                    : Inf
##  ARIMA(2,0,1)(2,1,0)[12] with drift         : Inf
##  ARIMA(2,0,2)(0,1,0)[12]                    : 4812.445
##  ARIMA(2,0,2)(0,1,0)[12] with drift         : 4813.734
##  ARIMA(2,0,2)(0,1,1)[12]                    : 4735.316
##  ARIMA(2,0,2)(0,1,1)[12] with drift         : 4732.794
##  ARIMA(2,0,2)(1,1,0)[12]                    : 4745.392
##  ARIMA(2,0,2)(1,1,0)[12] with drift         : 4745.313
##  ARIMA(2,0,3)(0,1,0)[12]                    : Inf
##  ARIMA(2,0,3)(0,1,0)[12] with drift         : Inf
##  ARIMA(3,0,0)(0,1,0)[12]                    : 4810.311
##  ARIMA(3,0,0)(0,1,0)[12] with drift         : 4811.602
##  ARIMA(3,0,0)(0,1,1)[12]                    : 4733.178
##  ARIMA(3,0,0)(0,1,1)[12] with drift         : 4730.636
##  ARIMA(3,0,0)(0,1,2)[12]                    : 4722.035
##  ARIMA(3,0,0)(0,1,2)[12] with drift         : 4720.983
##  ARIMA(3,0,0)(1,1,0)[12]                    : 4744.378
##  ARIMA(3,0,0)(1,1,0)[12] with drift         : 4744.311
##  ARIMA(3,0,0)(1,1,1)[12]                    : 4724.455
##  ARIMA(3,0,0)(1,1,1)[12] with drift         : 4722.635
##  ARIMA(3,0,0)(2,1,0)[12]                    : 4716.387
##  ARIMA(3,0,0)(2,1,0)[12] with drift         : 4715.039
##  ARIMA(3,0,1)(0,1,0)[12]                    : 4806.336
##  ARIMA(3,0,1)(0,1,0)[12] with drift         : 4807.692
##  ARIMA(3,0,1)(0,1,1)[12]                    : 4734.927
##  ARIMA(3,0,1)(0,1,1)[12] with drift         : 4732.331
##  ARIMA(3,0,1)(1,1,0)[12]                    : 4745.016
##  ARIMA(3,0,1)(1,1,0)[12] with drift         : 4744.867
##  ARIMA(3,0,2)(0,1,0)[12]                    : 4805.233
##  ARIMA(3,0,2)(0,1,0)[12] with drift         : 4806.608
##  ARIMA(4,0,0)(0,1,0)[12]                    : 4812.086
##  ARIMA(4,0,0)(0,1,0)[12] with drift         : 4813.34
##  ARIMA(4,0,0)(0,1,1)[12]                    : 4735.063
##  ARIMA(4,0,0)(0,1,1)[12] with drift         : 4732.063
##  ARIMA(4,0,0)(1,1,0)[12]                    : 4745.768
##  ARIMA(4,0,0)(1,1,0)[12] with drift         : 4745.387
##  ARIMA(4,0,1)(0,1,0)[12]                    : 4807.956
##  ARIMA(4,0,1)(0,1,0)[12] with drift         : 4809.256
##  ARIMA(5,0,0)(0,1,0)[12]                    : 4812.633
##  ARIMA(5,0,0)(0,1,0)[12] with drift         : 4814.024
## 
## 
## 
##  Best model: ARIMA(1,0,0)(2,1,0)[12] with drift
summary(auto_arima_model)
## Series: train_ts 
## ARIMA(1,0,0)(2,1,0)[12] with drift 
## 
## Coefficients:
##          ar1     sar1     sar2     drift
##       0.1553  -0.9758  -0.5567  6238.908
## s.e.  0.0821   0.0811   0.0889  2719.252
## 
## sigma^2 = 6.805e+11:  log likelihood = -2351.98
## AIC=4713.97   AICc=4714.37   BIC=4729.21
## 
## Training set error measures:
##                    ME     RMSE    MAE       MPE     MAPE      MASE         ACF1
## Training set -10516.9 784666.2 477478 -5.311666 11.76748 0.6738487 -0.005244743
best_arima_model <- Arima(kwh_ts, order = c(1,0,0), seasonal = c(2,1,0), include.drift = TRUE)

forecast_arima_2014 <- forecast(best_arima_model, h = 12)
plot(forecast_arima_2014, main = "ARIMA Forecasted KWH for 2014", ylab = "KWH", xlab = "Date") +
  theme_minimal()

## NULL
forecast_df <- tibble(
  DATE = seq(as.Date("2014-01-01"), by = "month", length.out = 12),
  KWH = as.numeric(forecast_arima_2014$mean)
) %>%
  mutate(DATE = format(DATE, "%Y-%b"))  

forecasts2014 <- bind_rows(
  kwh_df %>% 
    mutate(DATE = format(DATE, "%Y-%b")) %>%  
    select(DATE, KWH),                       
  forecast_df %>% select(DATE, KWH)          
)

write_xlsx(forecasts2014, "2014forecasts.xlsx")
ets_model <- ets(kwh_ts)
summary(ets_model)
## ETS(M,N,M) 
## 
## Call:
## ets(y = kwh_ts)
## 
##   Smoothing parameters:
##     alpha = 0.1137 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 6134772.7867 
##     s = 0.9547 0.7533 0.8603 1.1912 1.2556 1.1992
##            0.9982 0.7684 0.8161 0.9175 1.0618 1.2237
## 
##   sigma:  0.1199
## 
##      AIC     AICc      BIC 
## 6224.057 6226.784 6272.919 
## 
## Training set error measures:
##                    ME   RMSE      MAE      MPE     MAPE      MASE      ACF1
## Training set 61009.73 835107 503972.9 -4.39013 12.04006 0.7233624 0.1698584
ets_forecast <- forecast(ets_model, h = 12)
print(ets_forecast)
##          Point Forecast   Lo 80    Hi 80   Lo 95    Hi 95
## Jan 2014        9304198 7874822 10733573 7118157 11490239
## Feb 2014        8073653 6825214  9322093 6164330  9982976
## Mar 2014        6975902 5890247  8061557 5315536  8636269
## Apr 2014        6204893 5233075  7176711 4718626  7691160
## May 2014        5842200 4921428  6762972 4434001  7250399
## Jun 2014        7589986 6386314  8793659 5749128  9430845
## Jul 2014        9117199 7662450 10571948 6892353 11342046
## Aug 2014        9546786 8014248 11079324 7202971 11890601
## Sep 2014        9056676 7594096 10519256 6819853 11293500
## Oct 2014        6541029 5478445  7603613 4915947  8166111
## Nov 2014        5727865 4791927  6663803 4296472  7159259
## Dec 2014        7259239 6066203  8452274 5434648  9083829
plot(ets_forecast, main = "ETS Forecast for 2014", ylab = "KWH") +
  theme_minimal()

## NULL
checkresiduals(ets_model)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(M,N,M)
## Q* = 28.615, df = 24, p-value = 0.2349
## 
## Model df: 0.   Total lags used: 24
fourier_terms <- fourier(kwh_ts, K = 6)  

dhr_model <- auto.arima(kwh_ts, xreg = fourier_terms, seasonal = FALSE)
summary(dhr_model)
## Series: kwh_ts 
## Regression with ARIMA(0,1,2) errors 
## 
## Coefficients:
##           ma1      ma2     drift       S1-12      C1-12       S2-12       C2-12
##       -0.7172  -0.2306  8179.924  -433072.76  -55348.97  1421278.79  -202990.31
## s.e.   0.0703   0.0696  3509.582    97663.09   97312.69    91271.54    91133.93
##           S3-12      C3-12      S4-12     C4-12     S5-12       C5-12
##       404716.88  114035.52  174082.17  90253.95  32082.59  -213588.96
## s.e.   82283.63   82295.58   72334.38  72425.88  64035.07    64247.18
##           C6-12
##       -6954.648
## s.e.  43033.076
## 
## sigma^2 = 6.916e+11:  log likelihood = -2868.36
## AIC=5766.72   AICc=5769.46   BIC=5815.51
## 
## Training set error measures:
##                    ME   RMSE      MAE      MPE     MAPE     MASE       ACF1
## Training set 3541.437 798462 489037.3 -4.98658 11.54046 0.701925 0.01038267
fourier_future <- fourier(kwh_ts, K = 6, h = 12)
dhr_forecast <- forecast(dhr_model, xreg = fourier_future, h = 12)
print(dhr_forecast)
##          Point Forecast   Lo 80    Hi 80   Lo 95    Hi 95
## Jan 2014        9647050 8581302 10712797 8017130 11276970
## Feb 2014        7975471 6867927  9083016 6281628  9669315
## Mar 2014        6999967 5891025  8108908 5303987  8695946
## Apr 2014        6328632 5218296  7438969 4630519  8026745
## May 2014        6067726 4955996  7179456 4367482  7767970
## Jun 2014        7565226 6452105  8678347 5862854  9267598
## Jul 2014        8438888 7324378  9553399 6734391 10143386
## Aug 2014        9327233 8211334 10443132 7620613 11033853
## Sep 2014        8660460 7543175  9777745 6951720 10369200
## Oct 2014        6686376 5567706  7805045 4975518  8397233
## Nov 2014        5982442 4862389  7102494 4269469  7695414
## Dec 2014        7304501 6183067  8425934 5589416  9019585
plot(dhr_forecast, main = "Dynamic Harmonic Regression Forecast for 2014", ylab = "KWH") +
  theme_minimal()

## NULL
checkresiduals(dhr_model)

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(0,1,2) errors
## Q* = 13.406, df = 22, p-value = 0.9212
## 
## Model df: 2.   Total lags used: 24

Part C – BONUS, optional (part or all), Waterflow_Pipe1.xlsx and Waterflow_Pipe2.xlsx

Part C consists of two data sets. These are simple 2 columns sets, however they have different time stamps. Your optional assignment is to time-base sequence the data and aggregate based on hour (example of what this looks like, follows). Note for multiple recordings within an hour, take the mean. Then to determine if the data is stationary and can it be forecast. If so, provide a week forward forecast and present results via Rpubs and .rmd and the forecast in an Excel readable file.