library(tidyverse)
library(fpp3)
atm_data <- readxl::read_xlsx("ATM624Data.xlsx") %>%
mutate(DATE = as_date(DATE, origin="1900-01-01")) %>%
as_tsibble(index=DATE, key=c("ATM"))
atm_data <- atm_data[complete.cases(atm_data), ]
atm_data %>% autoplot(Cash)
ggplot(data=atm_data, mapping=aes(DATE, Cash, color=ATM)) + geom_line() +
facet_wrap(~ATM, ncol=1, scales="free")
Consider each ATM separately.
atm1_data <- atm_data %>% filter(ATM=="ATM1")
atm2_data <- atm_data %>% filter(ATM=="ATM2")
atm3_data <- atm_data %>% filter(ATM=="ATM3")
atm4_data <- atm_data %>% filter(ATM=="ATM4")
atm1_data<- atm1_data %>%
filter(ATM == "ATM1") %>%
mutate(t = row_number()) %>%
update_tsibble(index = t, regular = TRUE)
atm1_data %>% model(STL(Cash)) %>%
components() %>%
autoplot()
For ATM1, there is no seasonal component. The trend appears to increase
then decrease and remain nearly monotonous the rest of the time.
atm1_data %>%
features(Cash, unitroot_ndiffs)
## # A tibble: 1 × 2
## ATM ndiffs
## <chr> <int>
## 1 ATM1 0
atm1_data %>%
gg_tsdisplay(log(Cash), plot_type='partial')
fit <- atm1_data %>%
model(
additive = ETS(log(Cash) ~ error("A") + trend("A") +
season("N")),
multiplicative = ETS(log(Cash) ~ error("M") + trend("A") +
season("N")),
Damped = ETS(log(Cash) ~ error("A") + trend("Ad") +
season("N"))
)
fc1 <- fit %>% forecast(h = 100)
fc1 %>%
autoplot(atm1_data, level=NULL)
atm2_data <- atm2_data %>%
filter(ATM == "ATM2") %>%
mutate(t = row_number()) %>%
update_tsibble(index = t, regular = TRUE)
atm2_data %>% model(STL(Cash)) %>%
components() %>%
autoplot()
atm2_data %>%
gg_tsdisplay(log(Cash), plot_type='partial')
## Warning: Removed 25 rows containing missing values (`geom_segment()`).
## Removed 25 rows containing missing values (`geom_segment()`).
fit <- atm2_data %>%
model(ARIMA(Cash))
report(fit)
## Series: Cash
## Model: ARIMA(5,1,1)
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ma1
## -0.0791 -0.4303 -0.1951 -0.1130 -0.4829 -0.9342
## s.e. 0.0488 0.0480 0.0543 0.0478 0.0480 0.0298
##
## sigma^2 estimated as 992.2: log likelihood=-1762.11
## AIC=3538.22 AICc=3538.54 BIC=3565.46
fc2 <- fit %>% forecast(h = 100)
fc2 %>%
autoplot(atm2_data, level=NULL)
atm3_data <- atm3_data %>%
filter(ATM == "ATM3") %>%
mutate(t = row_number()) %>%
update_tsibble(index = t, regular = TRUE)
atm3_data %>% model(STL(Cash)) %>%
components() %>%
autoplot()
atm3_data %>%
gg_tsdisplay(Cash, plot_type='partial')
fit <- atm3_data %>%
model(NAIVE(Cash))
report(fit)
## Series: Cash
## Model: NAIVE
##
## sigma^2: 25.8985
fc3 <- fit %>% forecast(h = 100)
fc3 %>%
autoplot(atm3_data, level=NULL)
***
atm4_data <- atm4_data %>%
filter(ATM == "ATM4") %>%
mutate(t = row_number()) %>%
update_tsibble(index = t, regular = TRUE)
atm4_data %>% model(STL(Cash)) %>%
components() %>%
autoplot()
fit <- atm4_data %>%
model(ARIMA(Cash))
report(fit)
## Series: Cash
## Model: ARIMA(0,0,0) w/ mean
##
## Coefficients:
## constant
## 474.0433
## s.e. 34.0248
##
## sigma^2 estimated as 423718: log likelihood=-2882.03
## AIC=5768.06 AICc=5768.1 BIC=5775.86
fc4 <- fit %>% forecast(h = 100)
fc4 %>%
autoplot(atm4_data, level=NULL)
fc <- rbind(fc1, fc2, fc3, fc4)
## Warning: `rbind.fbl_ts()` was deprecated in fabletools 0.2.0.
## ℹ Please use `bind_rows()` instead.
## ℹ The deprecated feature was likely used in the base package.
## Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# write.csv(fc %>% as_tibble() %>% select(-c(Cash, `.model`)), "atm_forecasts.csv")
res_data <- readxl::read_xlsx("ResidentialCustomerForecastLoad-624.xlsx") %>%
mutate(date = yearmonth(`YYYY-MMM`)) %>%
select(date, KWH) %>%
as_tsibble(index=date) %>%
mutate(t = row_number()) %>%
update_tsibble(index = t, regular = TRUE)
res_data %>% autoplot(KWH)
Box-cox transformation
lambda <- res_data %>%
features(KWH, features = guerrero) %>%
pull(lambda_guerrero)
res_data <- res_data[complete.cases(res_data), ]
res_data <- res_data %>%
fill_gaps() %>%
tidyr::replace_na(list(t=129, KWH=mean(res_data$KWH)))
res_data %>%
model(STL(KWH)) %>%
components() %>%
autoplot()
res_data %>%
gg_tsdisplay(KWH, plot_type='partial')
lambda <- res_data %>%
features(KWH, features = guerrero) %>%
pull(lambda_guerrero)
fit <- res_data %>%
model(ARIMA(box_cox(KWH, lambda)))
report(fit)
## Series: KWH
## Model: ARIMA(0,1,5)
## Transformation: box_cox(KWH, lambda)
##
## Coefficients:
## ma1 ma2 ma3 ma4 ma5
## -0.4675 -0.5411 -0.3184 -0.0152 0.4155
## s.e. 0.0658 0.0853 0.0744 0.0831 0.0724
##
## sigma^2 estimated as 3.492e+22: log likelihood=-5227.39
## AIC=10466.78 AICc=10467.23 BIC=10486.29
fc <- fit %>% forecast(h = 100)
fc %>%
autoplot(res_data, level=NULL)
fc_result <- fc %>%
as_tibble() %>%
select(c(t, .mean)) %>%
mutate(Month=t, KWH=.mean, .keep="none")
fc_result
## # A tibble: 100 × 2
## Month KWH
## <dbl> <dbl>
## 1 193 9346784.
## 2 194 8051474.
## 3 195 6668606.
## 4 196 6409709.
## 5 197 7581581.
## 6 198 7581325.
## 7 199 7581069.
## 8 200 7580813.
## 9 201 7580556.
## 10 202 7580300.
## # ℹ 90 more rows
#write.csv(fc_result, "kwh_forecast.csv")