library(tidyverse) #data science
library(tidymodels) #training algorithms
library(lubridate) #transform to date class
library(modeltime) #forecasting
library(timetk) #bike_sharing_daily, plotting functions, splitting data
tsdata <- bike_sharing_daily
glimpse(tsdata)
## Rows: 731
## Columns: 16
## $ instant <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, …
## $ dteday <date> 2011-01-01, 2011-01-02, 2011-01-03, 2011-01-04, 2011-01-05…
## $ season <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ yr <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ mnth <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ holiday <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,…
## $ weekday <dbl> 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4,…
## $ workingday <dbl> 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1,…
## $ weathersit <dbl> 2, 2, 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2,…
## $ temp <dbl> 0.3441670, 0.3634780, 0.1963640, 0.2000000, 0.2269570, 0.20…
## $ atemp <dbl> 0.3636250, 0.3537390, 0.1894050, 0.2121220, 0.2292700, 0.23…
## $ hum <dbl> 0.805833, 0.696087, 0.437273, 0.590435, 0.436957, 0.518261,…
## $ windspeed <dbl> 0.1604460, 0.2485390, 0.2483090, 0.1602960, 0.1869000, 0.08…
## $ casual <dbl> 331, 131, 120, 108, 82, 88, 148, 68, 54, 41, 43, 25, 38, 54…
## $ registered <dbl> 654, 670, 1229, 1454, 1518, 1518, 1362, 891, 768, 1280, 122…
## $ cnt <dbl> 985, 801, 1349, 1562, 1600, 1606, 1510, 959, 822, 1321, 126…
sum(is.na(tsdata))
## [1] 0
sum(duplicated(tsdata))
## [1] 0
tsdata <-
tsdata %>%
select(dteday, cnt)
tsdata %>%
plot_time_series(dteday, cnt)
# Create Train and Test Sets
ts_split <- initial_time_split(tsdata,
prop = 0.70) #default is 0.75
ts_train <- training(ts_split)
ts_test <- testing(ts_split)
# OPTIONAL
ts_split %>% tk_time_series_cv_plan() %>%
plot_time_series_cv_plan(dteday, cnt)
arima_model <-
arima_reg() %>%
set_engine("auto_arima") %>%
fit(cnt ~ dteday, data = ts_train)
## frequency = 7 observations per 1 week
prophet_model <-
prophet_reg(
seasonality_yearly = TRUE,
seasonality_weekly = TRUE,
seasonality_daily = TRUE
) %>%
set_engine("prophet") %>%
fit(cnt ~ dteday, data = ts_train)
tslm_model <-
linear_reg() %>%
set_engine("lm") %>%
fit(cnt ~ as.numeric(dteday) + factor(month(dteday, label = TRUE)),
data = ts_train)
forecast_table <- modeltime_table(
arima_model,
prophet_model,
tslm_model
)
# Calibrate and Metrics
forecast_table %>%
modeltime_calibrate(ts_test) %>%
modeltime_accuracy()
## # A tibble: 3 × 9
## .model_id .model_desc .type mae mape mase smape rmse rsq
## <int> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 ARIMA(1,1,3) WITH DRIFT Test 1622. 218. 1.95 26.9 2432. 0.262
## 2 2 PROPHET Test 971. 153. 1.17 18.8 1357. 0.379
## 3 3 LM Test 980. 162. 1.18 19.0 1396. 0.338
# Calibrate, Forecast and Plot
forecast_table %>%
modeltime_calibrate(ts_test) %>%
modeltime_forecast(actual_data = ts_test) %>%
plot_modeltime_forecast()
## Using '.calibration_data' to forecast.
# Calibrate, Forecast on new data, and Plot
forecast_table %>%
modeltime_calibrate(ts_test) %>%
modeltime_forecast(new_data = ts_test,
actual_data = tsdata) %>%
plot_modeltime_forecast()
# Final fit to entire dataset
future_forecast <- forecast_table %>%
modeltime_refit(tsdata) %>%
modeltime_calibrate(tsdata) %>%
modeltime_forecast(
h = "3 months",
actual_data = tsdata
)
## frequency = 7 observations per 1 week
future_forecast %>%
plot_modeltime_forecast()