temperature <- read_csv("data_input/temperature.csv")
temperature <- temperature %>%
drop_na() %>%
select(c(datetime, `San Francisco`))
temperature_ts <- ts(temperature$`San Francisco`, frequency = 24)
temperature_ts %>% tail(24 * 7) %>%
autoplot()
## Time Series Decomposition
ts_dc <- decompose(temperature_ts)
# decompose example for last month
temperature_ts %>%
tail(24 * 7 * 4) %>%
decompose() %>%
autoplot()
data_dc <- temperature %>%
mutate(
trend = ts_dc$trend,
seasonal = ts_dc$seasonal,
random = ts_dc$random
)
data_dc_seas <- data_dc %>%
mutate(hour = hour(datetime)) %>%
distinct(hour, seasonal)
data_dc_seas
## # A tibble: 264 x 2
## hour seasonal
## <int> <dbl>
## 1 13 1.82
## 2 14 2.56
## 3 15 2.98
## 4 16 3.46
## 5 17 3.49
## 6 18 3.15
## 7 19 2.68
## 8 20 1.68
## 9 21 0.743
## 10 22 0.00712
## # ... with 254 more rows
ggplot(data_dc_seas, aes(x = factor(hour), y = seasonal)) +
geom_col()
temperature %>%
group_by(hour = hour(datetime)) %>%
summarise(`San Francisco` = mean(`San Francisco`)) %>%
ggplot(aes(hour, `San Francisco`)) +
geom_col()
data_msts <- msts(temperature_ts, seasonal.periods = c(24, 24 * 7))
ts_mstl <- mstl(data_msts)
data_msts %>%
tail(24 * 7 * 4) %>%
mstl() %>%
autoplot()
data_mstl <- as.data.frame(ts_mstl) %>%
mutate(Datetime = temperature$datetime) %>%
select(Datetime, everything())
data_mstl_seas <- data_mstl %>%
mutate(
wday = wday(Datetime, label = TRUE, abbr = FALSE),
hour = hour(Datetime)
) %>%
group_by(wday, hour) %>%
summarise(seasonal = mean(Seasonal24 + Seasonal168)) %>%
ungroup()
data_mstl_seas
## # A tibble: 168 x 3
## wday hour seasonal
## <ord> <int> <dbl>
## 1 Sunday 0 4.25
## 2 Sunday 1 3.46
## 3 Sunday 2 2.16
## 4 Sunday 3 0.870
## 5 Sunday 4 -0.282
## 6 Sunday 5 -1.16
## 7 Sunday 6 -1.64
## 8 Sunday 7 -2.23
## 9 Sunday 8 -2.63
## 10 Sunday 9 -2.84
## # ... with 158 more rows
ggplot(data_mstl_seas, aes(x = factor(hour), y = seasonal)) +
geom_col(aes(fill = wday))
# split train-test
data_test <- tail(temperature_ts, 24 * 7)
data_train <- head(temperature_ts, length(temperature_ts) - 24 * 7)
# subset to more recent period
data_train <- tail(data_train, 24 * 7 * 4 * 3)
autoplot(data_train)
autoplot(data_test)
ets_fc <- data_train %>%
log() %>%
ets() %>%
forecast(h = 24 * 7)
autoplot(ets_fc)
stlm_fc <- data_train %>%
msts(seasonal.periods = c(24, 24 * 7)) %>%
log() %>%
stlm() %>%
forecast(h = 24 * 7)
autoplot(stlm_fc)
test_df <- data.frame(
datetime = tail(temperature$datetime, 24 * 7),
test = as.numeric(data_test),
ets = as.numeric(ets_fc$mean) %>% exp() %>% round(),
stlm = as.numeric(stlm_fc$mean %>% exp() %>% round())
)
test_df %>%
gather(model, forecast, -test, -datetime) %>%
group_by(model) %>%
rmse(test, forecast)
## # A tibble: 2 x 4
## model .metric .estimator .estimate
## <chr> <chr> <chr> <dbl>
## 1 ets rmse standard 9.41
## 2 stlm rmse standard 8.56
error_df <- test_df %>%
select(datetime, test, stlm) %>%
mutate(
wday = wday(datetime, label = TRUE, abbr = FALSE),
hour = hour(datetime)
) %>%
group_by(wday, hour) %>%
rmse(test, stlm) %>%
ungroup() %>%
arrange(desc(.estimate))
error_df
## # A tibble: 168 x 5
## wday hour .metric .estimator .estimate
## <ord> <int> <chr> <chr> <dbl>
## 1 Friday 1 rmse standard 14.6
## 2 Thursday 1 rmse standard 14.4
## 3 Tuesday 23 rmse standard 14.2
## 4 Thursday 2 rmse standard 14.2
## 5 Wednesday 0 rmse standard 14.1
## 6 Friday 0 rmse standard 14.1
## 7 Tuesday 22 rmse standard 13.9
## 8 Thursday 0 rmse standard 13.8
## 9 Wednesday 1 rmse standard 13.8
## 10 Tuesday 21 rmse standard 13.6
## # ... with 158 more rows
ggplot(error_df, aes(factor(hour), .estimate)) +
geom_col(aes(fill = wday))