Part A – ATM Forecast, ATM624Data.xlsx

library(tidyverse)
library(fpp3)
atm_data <- readxl::read_xlsx("ATM624Data.xlsx") %>% 
  mutate(DATE = as_date(DATE, origin="1900-01-01")) %>% 
  as_tsibble(index=DATE, key=c("ATM"))

atm_data <- atm_data[complete.cases(atm_data), ]
atm_data %>% autoplot(Cash)

ggplot(data=atm_data, mapping=aes(DATE, Cash, color=ATM)) + geom_line() +
  facet_wrap(~ATM, ncol=1, scales="free")

Consider each ATM separately.

atm1_data <- atm_data %>% filter(ATM=="ATM1")
atm2_data <- atm_data %>% filter(ATM=="ATM2")
atm3_data <- atm_data %>% filter(ATM=="ATM3")
atm4_data <- atm_data %>% filter(ATM=="ATM4")
atm1_data<- atm1_data %>%
    filter(ATM == "ATM1") %>% 
    mutate(t = row_number()) %>% 
    update_tsibble(index = t, regular = TRUE) 

atm1_data %>% model(STL(Cash)) %>% 
    components() %>% 
    autoplot()

For ATM1, there is no seasonal component. The trend appears to increase then decrease and remain nearly monotonous the rest of the time.

atm1_data %>% 
  features(Cash, unitroot_ndiffs)
## # A tibble: 1 × 2
##   ATM   ndiffs
##   <chr>  <int>
## 1 ATM1       0
atm1_data %>% 
  gg_tsdisplay(log(Cash), plot_type='partial')

fit <- atm1_data %>% 
  model(
    additive = ETS(log(Cash) ~ error("A") + trend("A") +
                                                season("N")),
    multiplicative = ETS(log(Cash) ~ error("M") + trend("A") +
                                                season("N")),
    Damped = ETS(log(Cash) ~ error("A") + trend("Ad") +
               season("N"))
  )
fc1 <- fit %>%  forecast(h = 100)
fc1 %>% 
  autoplot(atm1_data, level=NULL)


atm2_data <- atm2_data %>%
    filter(ATM == "ATM2") %>% 
    mutate(t = row_number()) %>% 
    update_tsibble(index = t, regular = TRUE) 
atm2_data %>% model(STL(Cash)) %>% 
    components() %>% 
    autoplot()

atm2_data %>% 
  gg_tsdisplay(log(Cash), plot_type='partial')
## Warning: Removed 25 rows containing missing values (`geom_segment()`).
## Removed 25 rows containing missing values (`geom_segment()`).

fit <- atm2_data %>%
  model(ARIMA(Cash))
report(fit)
## Series: Cash 
## Model: ARIMA(5,1,1) 
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5      ma1
##       -0.0791  -0.4303  -0.1951  -0.1130  -0.4829  -0.9342
## s.e.   0.0488   0.0480   0.0543   0.0478   0.0480   0.0298
## 
## sigma^2 estimated as 992.2:  log likelihood=-1762.11
## AIC=3538.22   AICc=3538.54   BIC=3565.46
fc2 <- fit %>%  forecast(h = 100)
fc2 %>% 
  autoplot(atm2_data, level=NULL)


atm3_data <- atm3_data %>%
    filter(ATM == "ATM3") %>% 
    mutate(t = row_number()) %>% 
    update_tsibble(index = t, regular = TRUE) 

atm3_data %>% model(STL(Cash)) %>% 
    components() %>% 
    autoplot()

atm3_data %>% 
  gg_tsdisplay(Cash, plot_type='partial')

fit <- atm3_data %>%
  model(NAIVE(Cash))
report(fit)
## Series: Cash 
## Model: NAIVE 
## 
## sigma^2: 25.8985
fc3 <- fit %>%  forecast(h = 100)
fc3 %>% 
  autoplot(atm3_data, level=NULL)

***

atm4_data <- atm4_data %>%
    filter(ATM == "ATM4") %>% 
    mutate(t = row_number()) %>% 
    update_tsibble(index = t, regular = TRUE) 

atm4_data %>% model(STL(Cash)) %>% 
    components() %>% 
    autoplot()

fit <- atm4_data %>%
  model(ARIMA(Cash))
report(fit)
## Series: Cash 
## Model: ARIMA(0,0,0) w/ mean 
## 
## Coefficients:
##       constant
##       474.0433
## s.e.   34.0248
## 
## sigma^2 estimated as 423718:  log likelihood=-2882.03
## AIC=5768.06   AICc=5768.1   BIC=5775.86
fc4 <- fit %>%  forecast(h = 100)
fc4 %>% 
  autoplot(atm4_data, level=NULL)

fc <- rbind(fc1, fc2, fc3, fc4)
## Warning: `rbind.fbl_ts()` was deprecated in fabletools 0.2.0.
## ℹ Please use `bind_rows()` instead.
## ℹ The deprecated feature was likely used in the base package.
##   Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# write.csv(fc %>% as_tibble() %>% select(-c(Cash, `.model`)), "atm_forecasts.csv")

Part B – Forecasting Power, ResidentialCustomerForecastLoad-624.xlsx

res_data <- readxl::read_xlsx("ResidentialCustomerForecastLoad-624.xlsx") %>% 
  mutate(date = yearmonth(`YYYY-MMM`)) %>%
  select(date, KWH) %>%
  as_tsibble(index=date) %>% 
  mutate(t = row_number()) %>% 
  update_tsibble(index = t, regular = TRUE) 
res_data %>% autoplot(KWH)

Box-cox transformation

lambda <- res_data %>%
  features(KWH, features = guerrero) %>%
  pull(lambda_guerrero)
res_data <- res_data[complete.cases(res_data), ]
res_data <- res_data %>% 
            fill_gaps() %>% 
            tidyr::replace_na(list(t=129, KWH=mean(res_data$KWH)))

res_data %>% 
  model(STL(KWH)) %>%
    components() %>%
    autoplot()

res_data %>% 
  gg_tsdisplay(KWH, plot_type='partial')

lambda <- res_data %>%
  features(KWH, features = guerrero) %>%
  pull(lambda_guerrero)

fit <- res_data %>%
  model(ARIMA(box_cox(KWH, lambda)))
    
report(fit)
## Series: KWH 
## Model: ARIMA(0,1,5) 
## Transformation: box_cox(KWH, lambda) 
## 
## Coefficients:
##           ma1      ma2      ma3      ma4     ma5
##       -0.4675  -0.5411  -0.3184  -0.0152  0.4155
## s.e.   0.0658   0.0853   0.0744   0.0831  0.0724
## 
## sigma^2 estimated as 3.492e+22:  log likelihood=-5227.39
## AIC=10466.78   AICc=10467.23   BIC=10486.29
fc <- fit %>%  forecast(h = 100)
fc %>% 
  autoplot(res_data, level=NULL)

fc_result <- fc %>% 
  as_tibble() %>% 
  select(c(t, .mean)) %>% 
  mutate(Month=t, KWH=.mean, .keep="none")
  
fc_result
## # A tibble: 100 × 2
##    Month      KWH
##    <dbl>    <dbl>
##  1   193 9346784.
##  2   194 8051474.
##  3   195 6668606.
##  4   196 6409709.
##  5   197 7581581.
##  6   198 7581325.
##  7   199 7581069.
##  8   200 7580813.
##  9   201 7580556.
## 10   202 7580300.
## # ℹ 90 more rows
#write.csv(fc_result, "kwh_forecast.csv")