Read Data

temperature <- read_csv("data_input/temperature.csv") 

temperature <- temperature %>% 
  drop_na() %>% 
  select(c(datetime, `San Francisco`))
temperature_ts <- ts(temperature$`San Francisco`, frequency = 24)
temperature_ts %>% tail(24 * 7) %>% 
  autoplot()

## Time Series Decomposition

Simple Seasonality

ts_dc <- decompose(temperature_ts)

# decompose example for last month
temperature_ts %>% 
  tail(24 * 7 * 4) %>% 
  decompose() %>% 
  autoplot()

data_dc <- temperature %>%
  mutate(
    trend = ts_dc$trend,
    seasonal = ts_dc$seasonal,
    random = ts_dc$random
  )
data_dc_seas <- data_dc %>% 
  mutate(hour = hour(datetime)) %>% 
  distinct(hour, seasonal)

data_dc_seas
## # A tibble: 264 x 2
##     hour seasonal
##    <int>    <dbl>
##  1    13  1.82   
##  2    14  2.56   
##  3    15  2.98   
##  4    16  3.46   
##  5    17  3.49   
##  6    18  3.15   
##  7    19  2.68   
##  8    20  1.68   
##  9    21  0.743  
## 10    22  0.00712
## # ... with 254 more rows
ggplot(data_dc_seas, aes(x = factor(hour), y = seasonal)) +
  geom_col()

temperature %>% 
  group_by(hour = hour(datetime)) %>% 
  summarise(`San Francisco` = mean(`San Francisco`)) %>% 
  ggplot(aes(hour, `San Francisco`)) +
    geom_col()

Complex Seasonality

data_msts <- msts(temperature_ts, seasonal.periods = c(24, 24 * 7))
ts_mstl <- mstl(data_msts)

data_msts %>% 
  tail(24 * 7 * 4) %>% 
  mstl() %>% 
  autoplot()

data_mstl <- as.data.frame(ts_mstl) %>% 
  mutate(Datetime = temperature$datetime) %>% 
  select(Datetime, everything())
data_mstl_seas <- data_mstl %>% 
  mutate(
    wday = wday(Datetime, label = TRUE, abbr = FALSE),
    hour = hour(Datetime)
  ) %>% 
  group_by(wday, hour) %>% 
  summarise(seasonal = mean(Seasonal24 + Seasonal168)) %>% 
  ungroup()

data_mstl_seas
## # A tibble: 168 x 3
##    wday    hour seasonal
##    <ord>  <int>    <dbl>
##  1 Sunday     0    4.25 
##  2 Sunday     1    3.46 
##  3 Sunday     2    2.16 
##  4 Sunday     3    0.870
##  5 Sunday     4   -0.282
##  6 Sunday     5   -1.16 
##  7 Sunday     6   -1.64 
##  8 Sunday     7   -2.23 
##  9 Sunday     8   -2.63 
## 10 Sunday     9   -2.84 
## # ... with 158 more rows
ggplot(data_mstl_seas, aes(x = factor(hour), y = seasonal)) +
  geom_col(aes(fill = wday))

Forecast

Split Train-Test

# split train-test
data_test <- tail(temperature_ts, 24 * 7)
data_train <- head(temperature_ts, length(temperature_ts) - 24 * 7)

# subset to more recent period
data_train <- tail(data_train, 24 * 7 * 4 * 3)
autoplot(data_train)

autoplot(data_test)

Exponential Smoothing Model

ets_fc <- data_train %>% 
  log() %>% 
  ets() %>% 
  forecast(h = 24 * 7)

autoplot(ets_fc)

Complex Seasonality Model

stlm_fc <- data_train %>% 
  msts(seasonal.periods = c(24, 24 * 7)) %>% 
  log() %>% 
  stlm() %>% 
  forecast(h = 24 * 7)

autoplot(stlm_fc)

Model Evaluation

test_df <- data.frame(
  datetime = tail(temperature$datetime, 24 * 7),
  test = as.numeric(data_test),
  ets = as.numeric(ets_fc$mean) %>% exp() %>% round(),
  stlm = as.numeric(stlm_fc$mean %>% exp() %>% round())
)
test_df %>% 
  gather(model, forecast, -test, -datetime) %>% 
  group_by(model) %>% 
  rmse(test, forecast)
## # A tibble: 2 x 4
##   model .metric .estimator .estimate
##   <chr> <chr>   <chr>          <dbl>
## 1 ets   rmse    standard        9.41
## 2 stlm  rmse    standard        8.56

Analyzing Forecast Error

error_df <- test_df %>% 
  select(datetime, test, stlm) %>% 
  mutate(
    wday = wday(datetime, label = TRUE, abbr = FALSE),
    hour = hour(datetime)
  ) %>% 
  group_by(wday, hour) %>% 
  rmse(test, stlm) %>% 
  ungroup() %>% 
  arrange(desc(.estimate))

error_df
## # A tibble: 168 x 5
##    wday       hour .metric .estimator .estimate
##    <ord>     <int> <chr>   <chr>          <dbl>
##  1 Friday        1 rmse    standard        14.6
##  2 Thursday      1 rmse    standard        14.4
##  3 Tuesday      23 rmse    standard        14.2
##  4 Thursday      2 rmse    standard        14.2
##  5 Wednesday     0 rmse    standard        14.1
##  6 Friday        0 rmse    standard        14.1
##  7 Tuesday      22 rmse    standard        13.9
##  8 Thursday      0 rmse    standard        13.8
##  9 Wednesday     1 rmse    standard        13.8
## 10 Tuesday      21 rmse    standard        13.6
## # ... with 158 more rows
ggplot(error_df, aes(factor(hour), .estimate)) +
  geom_col(aes(fill = wday))