Comparing ETS and ARIMA Methods to Forecast Residential Energy Use
This notebook conducts an Exploratory Data Analysis (EDA) of energy data then performs a forecast using ETS (Error Trend Seasonality) and ARIMA (Autoregressive Integrated Moving Average) to forecast residential energy use.
Import Libraries
library(dplyr)
library(readr)
library(tidyverse)
library(timeSeries)
library(forecast)
library(timetk)
library(sweep)
library(tidyquant)
Import Data
ca_electricity <- read_csv("~/Data_Science/Energy/ElectricityByCounty.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## County = col_character(),
## Sector = col_character()
## )
## See spec(...) for full column specifications.
Inspect Data, Filter Residential Features, and Reshape Data from Wide to Long
ca_electricity <- ca_electricity %>%
dplyr::filter(Sector == "Residential") %>%
select(-Sector, -`Total Usage`)
ca_electricity_long <- ca_electricity %>%
gather("year", "energy", 2:29)
Format year to Date
ca_electricity_long$year <- as.Date(ca_electricity_long$year, "%Y")
ca_electricity_long %>%
group_by(County) %>%
summarize(meanEnergy = mean(energy)) %>%
arrange(desc(meanEnergy))
## # A tibble: 58 x 2
## County meanEnergy
## <chr> <dbl>
## 1 LOS ANGELES 18770.
## 2 ORANGE 6216.
## 3 SAN DIEGO 5724.
## 4 RIVERSIDE 5419.
## 5 SAN BERNARDINO 4313.
## 6 SACRAMENTO 4288.
## 7 SANTA CLARA 3689.
## 8 ALAMEDA 2842.
## 9 CONTRA COSTA 2535.
## 10 FRESNO 2303.
## # ... with 48 more rows
Los Angeles County consumes the most energy, 3x more than the second largest consumer, Riverside County.
ca_electricity_long %>%
group_by(County) %>%
dplyr::filter(year > "2014-01-01") %>%
summarize(meanEnergyLst3Yr = mean(energy)) %>%
arrange(desc(meanEnergyLst3Yr))
## # A tibble: 58 x 2
## County meanEnergyLst3Yr
## <chr> <dbl>
## 1 LOS ANGELES 20256.
## 2 RIVERSIDE 7156.
## 3 SAN DIEGO 6842.
## 4 ORANGE 6827.
## 5 SAN BERNARDINO 4982.
## 6 SACRAMENTO 4878.
## 7 SANTA CLARA 3893.
## 8 ALAMEDA 2947.
## 9 CONTRA COSTA 2804.
## 10 FRESNO 2726.
## # ... with 48 more rows
Use time series plots to visualize enrgy use overtime
ca_electricity_long %>% group_by(County) %>%
ggplot(aes(year, energy)) +
geom_line(aes(col = County)) +
theme(legend.text = element_text(size = 2), legend.key.size = unit(.2, "cm"))
Let’s just look at Southern California. We can see that Riverside County’s energy use is increasing while Los Angeles County’s is decreasing.
ca_electricity_long %>% dplyr::filter(County == "LOS ANGELES" | County == "RIVERSIDE" | County == "SAN DIEGO" | County == "SAN BERNARDINO" | County == "ORANGE" ) %>%
group_by(County) %>%
ggplot(aes(year, energy)) +
geom_line(aes(col = County)) +
theme(legend.text = element_text(size = 5), legend.key.size = unit(.5, "cm"))
First we must group counties to create separate time series model for each county.
energy_county_nest <- ca_electricity_long %>%
group_by(County) %>%
nest(.key = "data.tbl")
energy_county_nest
## # A tibble: 58 x 2
## County data.tbl
## <chr> <list>
## 1 ALAMEDA <tibble [28 × 2]>
## 2 ALPINE <tibble [28 × 2]>
## 3 AMADOR <tibble [28 × 2]>
## 4 BUTTE <tibble [28 × 2]>
## 5 CALAVERAS <tibble [28 × 2]>
## 6 COLUSA <tibble [28 × 2]>
## 7 CONTRA COSTA <tibble [28 × 2]>
## 8 DEL NORTE <tibble [28 × 2]>
## 9 EL DORADO <tibble [28 × 2]>
## 10 FRESNO <tibble [28 × 2]>
## # ... with 48 more rows
Coerce to Time Series object.
energy_county_ts <- energy_county_nest %>%
mutate(data.ts = map(.x = data.tbl,
.f = tk_ts,
start = 2000,
freq = 12))
energy_county_ts
## # A tibble: 58 x 3
## County data.tbl data.ts
## <chr> <list> <list>
## 1 ALAMEDA <tibble [28 × 2]> <S3: ts>
## 2 ALPINE <tibble [28 × 2]> <S3: ts>
## 3 AMADOR <tibble [28 × 2]> <S3: ts>
## 4 BUTTE <tibble [28 × 2]> <S3: ts>
## 5 CALAVERAS <tibble [28 × 2]> <S3: ts>
## 6 COLUSA <tibble [28 × 2]> <S3: ts>
## 7 CONTRA COSTA <tibble [28 × 2]> <S3: ts>
## 8 DEL NORTE <tibble [28 × 2]> <S3: ts>
## 9 EL DORADO <tibble [28 × 2]> <S3: ts>
## 10 FRESNO <tibble [28 × 2]> <S3: ts>
## # ... with 48 more rows
energy_county_fit <- energy_county_ts %>%
mutate(fit.ets = map(data.ts, ets))
energy_county_fit
## # A tibble: 58 x 4
## County data.tbl data.ts fit.ets
## <chr> <list> <list> <list>
## 1 ALAMEDA <tibble [28 × 2]> <S3: ts> <S3: ets>
## 2 ALPINE <tibble [28 × 2]> <S3: ts> <S3: ets>
## 3 AMADOR <tibble [28 × 2]> <S3: ts> <S3: ets>
## 4 BUTTE <tibble [28 × 2]> <S3: ts> <S3: ets>
## 5 CALAVERAS <tibble [28 × 2]> <S3: ts> <S3: ets>
## 6 COLUSA <tibble [28 × 2]> <S3: ts> <S3: ets>
## 7 CONTRA COSTA <tibble [28 × 2]> <S3: ts> <S3: ets>
## 8 DEL NORTE <tibble [28 × 2]> <S3: ts> <S3: ets>
## 9 EL DORADO <tibble [28 × 2]> <S3: ts> <S3: ets>
## 10 FRESNO <tibble [28 × 2]> <S3: ts> <S3: ets>
## # ... with 48 more rows
energy_county_fit %>%
mutate(tidy = map(fit.ets, sw_tidy)) %>%
unnest(tidy, .drop = TRUE) %>%
spread(key = County, value = estimate)
## # A tibble: 5 x 59
## term ALAMEDA ALPINE AMADOR BUTTE CALAVERAS COLUSA `CONTRA COSTA`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 alpha 0.676 0.205 0.994 1.00e-4 10.00e-1 1.000 0.646
## 2 b NA 0.0746 NA 1.17e+1 3.15e+0 NA 34.0
## 3 beta NA 0.205 NA 1.00e-4 1.00e-4 NA 0.000100
## 4 l 2503. 6.09 105. 5.25e+2 1.35e+2 50.6 2016.
## 5 phi NA NA NA 9.77e-1 NA NA NA
## # ... with 51 more variables: `DEL NORTE` <dbl>, `EL DORADO` <dbl>,
## # FRESNO <dbl>, GLENN <dbl>, HUMBOLDT <dbl>, IMPERIAL <dbl>, INYO <dbl>,
## # KERN <dbl>, KINGS <dbl>, LAKE <dbl>, LASSEN <dbl>, `LOS
## # ANGELES` <dbl>, MADERA <dbl>, MARIN <dbl>, MARIPOSA <dbl>,
## # MENDOCINO <dbl>, MERCED <dbl>, MODOC <dbl>, MONO <dbl>,
## # MONTEREY <dbl>, NAPA <dbl>, NEVADA <dbl>, ORANGE <dbl>, PLACER <dbl>,
## # PLUMAS <dbl>, RIVERSIDE <dbl>, SACRAMENTO <dbl>, `SAN BENITO` <dbl>,
## # `SAN BERNARDINO` <dbl>, `SAN DIEGO` <dbl>, `SAN FRANCISCO` <dbl>, `SAN
## # JOAQUIN` <dbl>, `SAN LUIS OBISPO` <dbl>, `SAN MATEO` <dbl>, `SANTA
## # BARBARA` <dbl>, `SANTA CLARA` <dbl>, `SANTA CRUZ` <dbl>, SHASTA <dbl>,
## # SIERRA <dbl>, SISKIYOU <dbl>, SOLANO <dbl>, SONOMA <dbl>,
## # STANISLAUS <dbl>, SUTTER <dbl>, TEHAMA <dbl>, TRINITY <dbl>,
## # TULARE <dbl>, TUOLUMNE <dbl>, VENTURA <dbl>, YOLO <dbl>, YUBA <dbl>
energy_county_fit %>%
mutate(glance = map(fit.ets, sw_glance)) %>%
unnest(glance, .drop = TRUE)
## # A tibble: 58 x 13
## County model.desc sigma logLik AIC BIC ME RMSE MAE
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALAMEDA ETS(A,N,N) 1.01e+2 -175. 356. 360. 27.4 97.5 72.2
## 2 ALPINE ETS(M,A,N) 1.45e-1 -49.3 109. 115. -0.140 1.42 0.886
## 3 AMADOR ETS(M,N,N) 3.45e-2 -86.7 179. 183. 1.52 4.20 3.12
## 4 BUTTE ETS(M,Ad,N) 3.78e-2 -134. 280. 288. -0.778 22.5 17.2
## 5 CALAVERAS ETS(M,A,N) 3.17e-2 -92.7 195. 202. -0.0978 5.54 3.95
## 6 COLUSA ETS(M,N,N) 3.25e-2 -63.2 132. 136. 0.702 1.85 1.31
## 7 CONTRA CO… ETS(M,A,N) 3.23e-2 -168. 345. 352. -2.99 77.7 56.7
## 8 DEL NORTE ETS(A,N,N) 9.59e+0 -109. 224. 228. 1.37 9.25 6.47
## 9 EL DORADO ETS(M,A,N) 2.83e-2 -127. 264. 271. -1.73 18.1 14.4
## 10 FRESNO ETS(M,A,N) 2.92e-2 -162. 334. 341. -0.492 63.1 49.8
## # ... with 48 more rows, and 4 more variables: MPE <dbl>, MAPE <dbl>,
## # MASE <dbl>, ACF1 <dbl>
augment_fit_ets <- energy_county_fit %>%
mutate(augment = map(fit.ets, sw_augment, timetk_idx = TRUE, rename_index = "date")) %>%
unnest(augment, .drop = TRUE)
augment_fit_ets
## # A tibble: 1,624 x 5
## County date .actual .fitted .resid
## <chr> <date> <dbl> <dbl> <dbl>
## 1 ALAMEDA 1990-01-22 2498. 2503. -4.47
## 2 ALAMEDA 1991-01-22 2515. 2500. 15.5
## 3 ALAMEDA 1992-01-22 2465. 2510. -45.2
## 4 ALAMEDA 1993-01-22 2529. 2480. 49.6
## 5 ALAMEDA 1994-01-22 2775. 2513. 261.
## 6 ALAMEDA 1995-01-22 2548. 2690. -141.
## 7 ALAMEDA 1996-01-22 2615. 2594. 21.0
## 8 ALAMEDA 1997-01-22 2675. 2608. 66.3
## 9 ALAMEDA 1998-01-22 2791. 2653. 138.
## 10 ALAMEDA 1999-01-22 2891. 2747. 144.
## # ... with 1,614 more rows
It’s much easier to visualize with a small number of counties. For purpose of this analysis we’ll focus on Southern California.
augment_fit_ets_resid <- augment_fit_ets %>%
dplyr::filter(County == "LOS ANGELES" | County == "RIVERSIDE" | County == "SAN DIEGO" | County == "SAN BERNARDINO" | County == "ORANGE" )
augment_fit_ets_resid %>%
ggplot(aes(x = date, y = .resid, group = County)) +
geom_hline(yintercept = 0, color = "grey40") +
geom_line(color = palette_light()[[2]]) +
geom_smooth(method = "loess") +
labs(title = "Energy Use by County",
subtitle = "ETS Model Residuals", x = "") +
theme_tq() +
facet_wrap(~ County, scale = "free_y", ncol = 3) +
scale_x_date(date_labels = "%Y")
From the above residuls, we can see the model is most accurate in Riverside County, and least accurate in Los Angeles.
augment_fit_ets_resid %>% group_by(County) %>%
summarize(countyResid = mean(.resid)) %>%
arrange(countyResid)
## # A tibble: 5 x 2
## County countyResid
## <chr> <dbl>
## 1 RIVERSIDE 0.00223
## 2 SAN BERNARDINO 49.5
## 3 SAN DIEGO 59.0
## 4 ORANGE 77.2
## 5 LOS ANGELES 114.
energy_county_fcast <- energy_county_fit %>%
mutate(fcast.ets = map(fit.ets, forecast, h = 3))
energy_county_fcast
## # A tibble: 58 x 5
## County data.tbl data.ts fit.ets fcast.ets
## <chr> <list> <list> <list> <list>
## 1 ALAMEDA <tibble [28 × 2]> <S3: ts> <S3: ets> <S3: forecast>
## 2 ALPINE <tibble [28 × 2]> <S3: ts> <S3: ets> <S3: forecast>
## 3 AMADOR <tibble [28 × 2]> <S3: ts> <S3: ets> <S3: forecast>
## 4 BUTTE <tibble [28 × 2]> <S3: ts> <S3: ets> <S3: forecast>
## 5 CALAVERAS <tibble [28 × 2]> <S3: ts> <S3: ets> <S3: forecast>
## 6 COLUSA <tibble [28 × 2]> <S3: ts> <S3: ets> <S3: forecast>
## 7 CONTRA COSTA <tibble [28 × 2]> <S3: ts> <S3: ets> <S3: forecast>
## 8 DEL NORTE <tibble [28 × 2]> <S3: ts> <S3: ets> <S3: forecast>
## 9 EL DORADO <tibble [28 × 2]> <S3: ts> <S3: ets> <S3: forecast>
## 10 FRESNO <tibble [28 × 2]> <S3: ts> <S3: ets> <S3: forecast>
## # ... with 48 more rows
energy_county_fcast_tidy <- energy_county_fcast %>%
mutate(sweep = map(fcast.ets, sw_sweep, fitted = FALSE, timetk_idx = TRUE)) %>%
unnest(sweep)
energy_county_fcast_tidy
## # A tibble: 1,798 x 8
## County index key energy lo.80 lo.95 hi.80 hi.95
## <chr> <date> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALAMEDA 1990-01-22 actual 2498. NA NA NA NA
## 2 ALAMEDA 1991-01-22 actual 2515. NA NA NA NA
## 3 ALAMEDA 1992-01-22 actual 2465. NA NA NA NA
## 4 ALAMEDA 1993-01-22 actual 2529. NA NA NA NA
## 5 ALAMEDA 1994-01-22 actual 2775. NA NA NA NA
## 6 ALAMEDA 1995-01-22 actual 2548. NA NA NA NA
## 7 ALAMEDA 1996-01-22 actual 2615. NA NA NA NA
## 8 ALAMEDA 1997-01-22 actual 2675. NA NA NA NA
## 9 ALAMEDA 1998-01-22 actual 2791. NA NA NA NA
## 10 ALAMEDA 1999-01-22 actual 2891. NA NA NA NA
## # ... with 1,788 more rows
energy_county_fcast_tidy <- energy_county_fcast_tidy %>%
dplyr::filter(County == "LOS ANGELES" | County == "RIVERSIDE" | County == "SAN DIEGO" | County == "SAN BERNARDINO" | County == "ORANGE")
energy_county_fcast_tidy %>%
ggplot(aes(x = index, y = energy, color = key, group = County)) +
geom_ribbon(aes(ymin = lo.95, ymax = hi.95),
fill = "#D5DBFF", color = NA, size = 0) +
geom_ribbon(aes(ymin = lo.80, ymax = hi.80, fill = key),
fill = "#596DD5", color = NA, size = 0, alpha = 0.8) +
geom_line() +
labs(title = "Energy Use by County",
subtitle = "ETS Model Forecasts",
x = "", y = "Units") +
scale_x_date(date_breaks = "2 year", date_labels = "%Y") +
scale_color_tq() +
scale_fill_tq() +
facet_wrap(~ County, scales = "free_y", ncol = 3) +
theme_tq() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 5))
energy_county_fit <- energy_county_ts %>%
mutate(fit.arima = map(data.ts, Arima))
energy_county_fit
## # A tibble: 58 x 4
## County data.tbl data.ts fit.arima
## <chr> <list> <list> <list>
## 1 ALAMEDA <tibble [28 × 2]> <S3: ts> <S3: ARIMA>
## 2 ALPINE <tibble [28 × 2]> <S3: ts> <S3: ARIMA>
## 3 AMADOR <tibble [28 × 2]> <S3: ts> <S3: ARIMA>
## 4 BUTTE <tibble [28 × 2]> <S3: ts> <S3: ARIMA>
## 5 CALAVERAS <tibble [28 × 2]> <S3: ts> <S3: ARIMA>
## 6 COLUSA <tibble [28 × 2]> <S3: ts> <S3: ARIMA>
## 7 CONTRA COSTA <tibble [28 × 2]> <S3: ts> <S3: ARIMA>
## 8 DEL NORTE <tibble [28 × 2]> <S3: ts> <S3: ARIMA>
## 9 EL DORADO <tibble [28 × 2]> <S3: ts> <S3: ARIMA>
## 10 FRESNO <tibble [28 × 2]> <S3: ts> <S3: ARIMA>
## # ... with 48 more rows
energy_county_fit %>%
mutate(tidy = map(fit.arima, sw_tidy)) %>%
unnest(tidy, .drop = TRUE) %>%
spread(key = County, value = estimate)
## # A tibble: 1 x 59
## term ALAMEDA ALPINE AMADOR BUTTE CALAVERAS COLUSA `CONTRA COSTA`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 intercept 2842. 8.38 128. 660. 178. 58.8 2535.
## # ... with 51 more variables: `DEL NORTE` <dbl>, `EL DORADO` <dbl>,
## # FRESNO <dbl>, GLENN <dbl>, HUMBOLDT <dbl>, IMPERIAL <dbl>, INYO <dbl>,
## # KERN <dbl>, KINGS <dbl>, LAKE <dbl>, LASSEN <dbl>, `LOS
## # ANGELES` <dbl>, MADERA <dbl>, MARIN <dbl>, MARIPOSA <dbl>,
## # MENDOCINO <dbl>, MERCED <dbl>, MODOC <dbl>, MONO <dbl>,
## # MONTEREY <dbl>, NAPA <dbl>, NEVADA <dbl>, ORANGE <dbl>, PLACER <dbl>,
## # PLUMAS <dbl>, RIVERSIDE <dbl>, SACRAMENTO <dbl>, `SAN BENITO` <dbl>,
## # `SAN BERNARDINO` <dbl>, `SAN DIEGO` <dbl>, `SAN FRANCISCO` <dbl>, `SAN
## # JOAQUIN` <dbl>, `SAN LUIS OBISPO` <dbl>, `SAN MATEO` <dbl>, `SANTA
## # BARBARA` <dbl>, `SANTA CLARA` <dbl>, `SANTA CRUZ` <dbl>, SHASTA <dbl>,
## # SIERRA <dbl>, SISKIYOU <dbl>, SOLANO <dbl>, SONOMA <dbl>,
## # STANISLAUS <dbl>, SUTTER <dbl>, TEHAMA <dbl>, TRINITY <dbl>,
## # TULARE <dbl>, TUOLUMNE <dbl>, VENTURA <dbl>, YOLO <dbl>, YUBA <dbl>
energy_county_fit %>%
mutate(glance = map(fit.arima, sw_glance)) %>%
unnest(glance, .drop = TRUE)
## # A tibble: 58 x 13
## County model.desc sigma logLik AIC BIC ME RMSE MAE
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALAMEDA ARIMA(0,0,0)… 195. -187. 378. 380. -4.06e-13 192. 164.
## 2 ALPINE ARIMA(0,0,0)… 2.39 -63.6 131. 134. -2.02e-13 2.35 2.02
## 3 AMADOR ARIMA(0,0,0)… 13.5 -112. 228. 231. 9.64e-15 13.2 11.6
## 4 BUTTE ARIMA(0,0,0)… 73.6 -160. 323. 326. 2.27e-13 72.3 65.7
## 5 CALAVE… ARIMA(0,0,0)… 23.7 -128. 260. 262. 1.62e-14 23.2 20.5
## 6 COLUSA ARIMA(0,0,0)… 6.38 -91.1 186. 189. 6.09e-15 6.27 5.81
## 7 CONTRA… ARIMA(0,0,0)… 268. -196. 396. 398. 9.42e-13 263. 231.
## 8 DEL NO… ARIMA(0,0,0)… 11.5 -108. 219. 222. 2.28e-14 11.3 9.56
## 9 EL DOR… ARIMA(0,0,0)… 90.8 -165. 335. 338. 4.47e-13 89.1 80.0
## 10 FRESNO ARIMA(0,0,0)… 334. -202. 408. 411. 2.03e-13 328. 294.
## # ... with 48 more rows, and 4 more variables: MPE <dbl>, MAPE <dbl>,
## # MASE <dbl>, ACF1 <dbl>
augment_fit_arima <- energy_county_fit %>%
mutate(augment = map(fit.arima, sw_augment, timetk_idx = TRUE, rename_index = "date")) %>%
unnest(augment, .drop = TRUE)
augment_fit_arima
## # A tibble: 1,624 x 5
## County date .actual .fitted .resid
## <chr> <date> <dbl> <dbl> <dbl>
## 1 ALAMEDA 1990-01-22 2498. 2842. -344.
## 2 ALAMEDA 1991-01-22 2515. 2842. -327.
## 3 ALAMEDA 1992-01-22 2465. 2842. -377.
## 4 ALAMEDA 1993-01-22 2529. 2842. -313.
## 5 ALAMEDA 1994-01-22 2775. 2842. -67.4
## 6 ALAMEDA 1995-01-22 2548. 2842. -293.
## 7 ALAMEDA 1996-01-22 2615. 2842. -227.
## 8 ALAMEDA 1997-01-22 2675. 2842. -167.
## 9 ALAMEDA 1998-01-22 2791. 2842. -50.6
## 10 ALAMEDA 1999-01-22 2891. 2842. 48.8
## # ... with 1,614 more rows
It’s much easier to visualize with a small number of counties. For purpose of this analysis we’ll focus on Southern California.
augment_fit_arima_resid <- augment_fit_arima %>%
dplyr::filter(County == "LOS ANGELES" | County == "RIVERSIDE" | County == "SAN DIEGO" | County == "SAN BERNARDINO" | County == "ORANGE" )
augment_fit_arima_resid %>%
ggplot(aes(x = date, y = .resid, group = County)) +
geom_hline(yintercept = 0, color = "grey40") +
geom_line(color = palette_light()[[2]]) +
geom_smooth(method = "loess") +
labs(title = "Energy Use by County",
subtitle = "ARIMA Model Residuals", x = "") +
theme_tq() +
facet_wrap(~ County, scale = "free_y", ncol = 3) +
scale_x_date(date_labels = "%Y")
From the above residuls, we can see the model is most accurate in San Bernardino County, and least accurate in Orange County.
augment_fit_arima_resid %>% group_by(County) %>%
summarize(countyResid = mean(.resid)) %>%
arrange(countyResid)
## # A tibble: 5 x 2
## County countyResid
## <chr> <dbl>
## 1 SAN BERNARDINO 4.71e-13
## 2 RIVERSIDE 5.36e-13
## 3 LOS ANGELES 1.49e-12
## 4 SAN DIEGO 1.61e-12
## 5 ORANGE 2.50e-12
energy_county_fcast <- energy_county_fit %>%
mutate(fcast.arima = map(fit.arima, forecast, h = 3))
energy_county_fcast
## # A tibble: 58 x 5
## County data.tbl data.ts fit.arima fcast.arima
## <chr> <list> <list> <list> <list>
## 1 ALAMEDA <tibble [28 × 2]> <S3: ts> <S3: ARIMA> <S3: forecast>
## 2 ALPINE <tibble [28 × 2]> <S3: ts> <S3: ARIMA> <S3: forecast>
## 3 AMADOR <tibble [28 × 2]> <S3: ts> <S3: ARIMA> <S3: forecast>
## 4 BUTTE <tibble [28 × 2]> <S3: ts> <S3: ARIMA> <S3: forecast>
## 5 CALAVERAS <tibble [28 × 2]> <S3: ts> <S3: ARIMA> <S3: forecast>
## 6 COLUSA <tibble [28 × 2]> <S3: ts> <S3: ARIMA> <S3: forecast>
## 7 CONTRA COSTA <tibble [28 × 2]> <S3: ts> <S3: ARIMA> <S3: forecast>
## 8 DEL NORTE <tibble [28 × 2]> <S3: ts> <S3: ARIMA> <S3: forecast>
## 9 EL DORADO <tibble [28 × 2]> <S3: ts> <S3: ARIMA> <S3: forecast>
## 10 FRESNO <tibble [28 × 2]> <S3: ts> <S3: ARIMA> <S3: forecast>
## # ... with 48 more rows
energy_county_fcast_tidy <- energy_county_fcast %>%
mutate(sweep = map(fcast.arima, sw_sweep, fitted = FALSE, timetk_idx = TRUE)) %>%
unnest(sweep)
energy_county_fcast_tidy
## # A tibble: 1,798 x 8
## County index key energy lo.80 lo.95 hi.80 hi.95
## <chr> <date> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ALAMEDA 1990-01-22 actual 2498. NA NA NA NA
## 2 ALAMEDA 1991-01-22 actual 2515. NA NA NA NA
## 3 ALAMEDA 1992-01-22 actual 2465. NA NA NA NA
## 4 ALAMEDA 1993-01-22 actual 2529. NA NA NA NA
## 5 ALAMEDA 1994-01-22 actual 2775. NA NA NA NA
## 6 ALAMEDA 1995-01-22 actual 2548. NA NA NA NA
## 7 ALAMEDA 1996-01-22 actual 2615. NA NA NA NA
## 8 ALAMEDA 1997-01-22 actual 2675. NA NA NA NA
## 9 ALAMEDA 1998-01-22 actual 2791. NA NA NA NA
## 10 ALAMEDA 1999-01-22 actual 2891. NA NA NA NA
## # ... with 1,788 more rows
energy_county_fcast_tidy <- energy_county_fcast_tidy %>%
dplyr::filter(County == "LOS ANGELES" | County == "RIVERSIDE" | County == "SAN DIEGO" | County == "SAN BERNARDINO" | County == "ORANGE")
energy_county_fcast_tidy %>%
ggplot(aes(x = index, y = energy, color = key, group = County)) +
geom_ribbon(aes(ymin = lo.95, ymax = hi.95),
fill = "#D5DBFF", color = NA, size = 0) +
geom_ribbon(aes(ymin = lo.80, ymax = hi.80, fill = key),
fill = "#596DD5", color = NA, size = 0, alpha = 0.8) +
geom_line() +
labs(title = "Energy Use by County",
subtitle = "ETS Model Forecasts",
x = "", y = "Units") +
scale_x_date(date_breaks = "2 year", date_labels = "%Y") +
scale_color_tq() +
scale_fill_tq() +
facet_wrap(~ County, scales = "free_y", ncol = 3) +
theme_tq() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 5))