pacman::p_load(tidyverse,DataExplorer,fpp3,tsibble,xts,imputeTS,cowplot)
power_data <- readxl::read_excel("Data/ResidentialCustomerForecastLoad-624.xlsx")
power_ts <- power_data %>% mutate(`YYYY-MMM` = yearmonth(`YYYY-MMM`)) %>% select(!CaseSequence) %>% tsibble(index=`YYYY-MMM`)
autoplot(power_ts)
plot_histogram(power_ts)
power_ts %>% arrange(KWH)
# A tsibble: 192 x 2 [1M]
`YYYY-MMM` KWH
<mth> <dbl>
1 2010 Jul 770523
2 2005 Nov 4313019
3 1999 Dec 4419229
4 1999 May 4426794
5 1999 Nov 4436269
6 2005 May 4453983
7 2006 Nov 4458429
8 2001 Nov 4461979
9 2008 Nov 4555602
10 2008 May 4608528
# ... with 182 more rows
power_ts <- power_ts %>%
mutate(KWH=ifelse(KWH>4000000,KWH,NA))
ggplot_na_imputations(power_ts,na_kalman (power_ts))
ggplot_na_imputations(power_ts,na_interpolation(power_ts))
power_ts <- power_ts %>%
mutate( KWH =na_kalman(KWH))
plot_grid(autoplot(power_ts, box_cox(KWH, features(power_ts,KWH, features = guerrero)$lambda_guerrero))+labs(y= "Lambda KWH"
,title = "Lambda Transform")
,autoplot(power_ts),ncol=1 )
auto_ETS <- power_ts %>%
model(
ETS(KWH)
)
auto_ETS %>%
report()
Series: KWH
Model: ETS(M,N,M)
Smoothing parameters:
alpha = 0.1152884
gamma = 0.1983171
Initial states:
l[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5] s[-6]
6188218 0.9083241 0.7515655 0.9314405 1.225896 1.276768 1.233079 1.016933
s[-7] s[-8] s[-9] s[-10] s[-11]
0.7623912 0.804406 0.8855059 1.011111 1.19258
sigma^2: 0.0094
AIC AICc BIC
6145.320 6148.047 6194.182
compare_ETS <- power_ts %>%
model(
`Multiplicative Trend` = ETS(KWH ~ error("M") + trend("A") +
season("M")),
`Damped Multiplicative` = ETS(KWH ~ error("M") +
trend("Ad") + season("M"))
)
bind_rows(glance(auto_ETS),report(compare_ETS)) %>% arrange(AICc)
# A tibble: 3 x 9
.model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ETS(KWH) 0.00943 -3058. 6145. 6148. 6194. 3.98e11 4.20e11 0.0738
2 Multiplicative Trend 0.00929 -3056. 6146. 6149. 6201. 3.89e11 4.01e11 0.0705
3 Damped Multiplicative 0.00937 -3056. 6147. 6151. 6206. 3.86e11 4.05e11 0.0703
bind_rows(accuracy(auto_ETS),accuracy(compare_ETS)) %>% arrange(RMSE)
# A tibble: 3 x 10
.model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Damped Multiplicat~ Trai~ 49856. 6.22e5 4.61e5 -0.0664 6.94 0.738 0.753 0.277
2 Multiplicative Tre~ Trai~ 26430. 6.24e5 4.64e5 -0.376 7.00 0.744 0.756 0.313
3 ETS(KWH) Trai~ 52868. 6.31e5 4.82e5 0.0715 7.27 0.772 0.765 0.220
bind_cols(compare_ETS,auto_ETS) %>%
forecast(h=10) %>% autoplot(power_ts,level=NULL)
gg_tsdisplay(power_ts,plot_type='partial')
power_ts %>%
features(KWH, unitroot_nsdiffs)
# A tibble: 1 x 1
nsdiffs
<int>
1 1
power_ts %>%
mutate(dif =difference(KWH,12)) %>%
features(dif, unitroot_ndiffs)
# A tibble: 1 x 1
ndiffs
<int>
1 0
| The first step begins with generating a baseline model.
auto_ARIMA <- power_ts %>%
model(
ARIMA(KWH)
)
auto_ARIMA %>% report()
Series: KWH
Model: ARIMA(0,0,4)(2,1,0)[12] w/ drift
Coefficients:
ma1 ma2 ma3 ma4 sar1 sar2 constant
0.3459 0.0665 0.2109 -0.0772 -0.7368 -0.4342 233796.06
s.e. 0.0772 0.0838 0.0737 0.0819 0.0763 0.0784 73852.84
sigma^2 estimated as 3.866e+11: log likelihood=-2657.65
AIC=5331.31 AICc=5332.15 BIC=5356.85
compare_ARIMA <- power_ts %>%
model(
arima_000_110 = ARIMA(KWH ~ pdq(0,0,0) + PDQ(1,1,0)),
arima_001_110 = ARIMA(KWH ~ pdq(0,0,1) + PDQ(1,1,0)),
arima_001_111 = ARIMA(KWH ~ pdq(0,0,1) + PDQ(1,1,1)),
arima_000_210 = ARIMA(KWH ~ pdq(0,0,0) + PDQ(2,1,0)),
arima_100_012 = ARIMA(KWH ~ pdq(1,0,0) + PDQ(0,1,2)),
arima_001_012 = ARIMA(KWH ~ pdq(0,0,1) + PDQ(1,1,2)),
arima_002_410 = ARIMA(KWH ~ pdq(0,0,2) + PDQ(4,1,0)),
arima_4002_210 = ARIMA(KWH ~ pdq(4,0,0) + PDQ(2,1,0))
)
bind_rows(glance(compare_ARIMA),glance(auto_ARIMA)) %>% arrange(AIC)
# A tibble: 9 x 8
.model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 ARIMA(KWH) 386602799008. -2658. 5331. 5332. 5357. <cpl [24]> <cpl [4]>
2 arima_4002_210 389634639624. -2658. 5333. 5333. 5358. <cpl [28]> <cpl [0]>
3 arima_100_012 404924310601. -2663. 5336. 5337. 5352. <cpl [1]> <cpl [24]>
4 arima_001_111 406833491598. -2663. 5337. 5337. 5353. <cpl [12]> <cpl [13]>
5 arima_001_012 408948678419. -2663. 5339. 5339. 5358. <cpl [12]> <cpl [25]>
6 arima_002_410 408675724669. -2662. 5340. 5341. 5366. <cpl [48]> <cpl [2]>
7 arima_000_210 440120515524. -2671. 5349. 5349. 5362. <cpl [24]> <cpl [0]>
8 arima_001_110 469469551957. -2674. 5357. 5357. 5370. <cpl [12]> <cpl [1]>
9 arima_000_110 501282435339. -2681. 5368. 5368. 5377. <cpl [12]> <cpl [0]>
bind_rows(accuracy(compare_ARIMA),accuracy(auto_ARIMA)) %>% arrange(RMSE)
# A tibble: 9 x 10
.model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ARIMA(KWH) Traini~ -8679. 5.90e5 4.30e5 -0.798 6.53 0.690 0.715 1.27e-4
2 arima_4002_210 Traini~ -8451. 5.93e5 4.30e5 -0.817 6.53 0.690 0.718 1.93e-2
3 arima_002_410 Traini~ -10256. 6.07e5 4.37e5 -0.874 6.63 0.700 0.735 -7.41e-4
4 arima_100_012 Traini~ -13655. 6.09e5 4.42e5 -0.933 6.71 0.709 0.738 -3.74e-3
5 arima_001_012 Traini~ -13655. 6.11e5 4.43e5 -0.939 6.74 0.711 0.740 -6.95e-4
6 arima_001_111 Traini~ -16533. 6.11e5 4.44e5 -0.985 6.76 0.712 0.740 -1.28e-3
7 arima_000_210 Traini~ -12501. 6.37e5 4.58e5 -1.01 6.89 0.734 0.772 2.78e-1
8 arima_001_110 Traini~ -7866. 6.58e5 4.78e5 -0.791 7.21 0.766 0.797 -1.37e-2
9 arima_000_110 Traini~ -10304. 6.82e5 4.96e5 -0.915 7.41 0.795 0.826 2.49e-1
compare_models <- power_ts %>%
slice(-n()) %>%
stretch_tsibble(.init = 10) %>%
model(
`Damped Multiplicative` = ETS(KWH ~ error("M") +
trend("Ad") + season("M")),
ARIMA = ARIMA(KWH ~ pdq(0,0,4) + PDQ(2,1,0))
)
compare_models %>%
forecast(h = 12) %>%
accuracy(power_ts) %>%
select(.model, RMSE:MAPE)
ggdraw() + draw_image("https://i.gyazo.com/fd8b1f4c75166cbf12829b852fc903ca.png")
final_model <- power_ts %>%
model(
ARIMA(KWH ~ pdq(0,0,4) + PDQ(2,1,0))
)
final_model %>% forecast(h=12) %>%
autoplot(power_ts)
final_model %>% forecast(h=12)
# A fable: 12 x 4 [1M]
# Key: .model [1]
.model `YYYY-MMM` KWH .mean
<chr> <mth> <dist> <dbl>
1 ARIMA(KWH ~ pdq(0, 0, 4) + PDQ(2, 1, 0~ 2014 Jan N(1e+07, 3.9e+11) 1.02e7
2 ARIMA(KWH ~ pdq(0, 0, 4) + PDQ(2, 1, 0~ 2014 Feb N(8720444, 4.3e+11) 8.72e6
3 ARIMA(KWH ~ pdq(0, 0, 4) + PDQ(2, 1, 0~ 2014 Mar N(7132127, 4.3e+11) 7.13e6
4 ARIMA(KWH ~ pdq(0, 0, 4) + PDQ(2, 1, 0~ 2014 Apr N(5807917, 4.5e+11) 5.81e6
5 ARIMA(KWH ~ pdq(0, 0, 4) + PDQ(2, 1, 0~ 2014 May N(5938295, 4.5e+11) 5.94e6
6 ARIMA(KWH ~ pdq(0, 0, 4) + PDQ(2, 1, 0~ 2014 Jun N(8207200, 4.5e+11) 8.21e6
7 ARIMA(KWH ~ pdq(0, 0, 4) + PDQ(2, 1, 0~ 2014 Jul N(9520351, 4.5e+11) 9.52e6
8 ARIMA(KWH ~ pdq(0, 0, 4) + PDQ(2, 1, 0~ 2014 Aug N(1e+07, 4.5e+11) 1.00e7
9 ARIMA(KWH ~ pdq(0, 0, 4) + PDQ(2, 1, 0~ 2014 Sep N(8501665, 4.5e+11) 8.50e6
10 ARIMA(KWH ~ pdq(0, 0, 4) + PDQ(2, 1, 0~ 2014 Oct N(5870434, 4.5e+11) 5.87e6
11 ARIMA(KWH ~ pdq(0, 0, 4) + PDQ(2, 1, 0~ 2014 Nov N(6158797, 4.5e+11) 6.16e6
12 ARIMA(KWH ~ pdq(0, 0, 4) + PDQ(2, 1, 0~ 2014 Dec N(8354323, 4.5e+11) 8.35e6