Loading the Required Libraries for Modeling and Forecasting

library(tidyverse) #data science
library(tidymodels) #training algorithms
library(lubridate) #transform to date class
library(modeltime) #forecasting
library(timetk) #bike_sharing_daily, plotting functions, splitting data

Reading the Data

tsdata <- bike_sharing_daily

glimpse(tsdata)
## Rows: 731
## Columns: 16
## $ instant    <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, …
## $ dteday     <date> 2011-01-01, 2011-01-02, 2011-01-03, 2011-01-04, 2011-01-05…
## $ season     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ yr         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ mnth       <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ holiday    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,…
## $ weekday    <dbl> 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4,…
## $ workingday <dbl> 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1,…
## $ weathersit <dbl> 2, 2, 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2,…
## $ temp       <dbl> 0.3441670, 0.3634780, 0.1963640, 0.2000000, 0.2269570, 0.20…
## $ atemp      <dbl> 0.3636250, 0.3537390, 0.1894050, 0.2121220, 0.2292700, 0.23…
## $ hum        <dbl> 0.805833, 0.696087, 0.437273, 0.590435, 0.436957, 0.518261,…
## $ windspeed  <dbl> 0.1604460, 0.2485390, 0.2483090, 0.1602960, 0.1869000, 0.08…
## $ casual     <dbl> 331, 131, 120, 108, 82, 88, 148, 68, 54, 41, 43, 25, 38, 54…
## $ registered <dbl> 654, 670, 1229, 1454, 1518, 1518, 1362, 891, 768, 1280, 122…
## $ cnt        <dbl> 985, 801, 1349, 1562, 1600, 1606, 1510, 959, 822, 1321, 126…

Data Wrangling and EXploratory Data Analysis

sum(is.na(tsdata))
## [1] 0
sum(duplicated(tsdata))
## [1] 0
tsdata <- 
        tsdata %>%
        select(dteday, cnt)

tsdata %>%
        plot_time_series(dteday, cnt)

plitting Data into Training and Testing

# Create Train and Test Sets
ts_split <- initial_time_split(tsdata,
                               prop = 0.70) #default is 0.75
ts_train <- training(ts_split)
ts_test <- testing(ts_split)

# OPTIONAL
ts_split %>% tk_time_series_cv_plan() %>%
        plot_time_series_cv_plan(dteday, cnt)

Using ARIMA regression Forecasting Algorithm to Train Model

arima_model <- 
        arima_reg() %>% 
        set_engine("auto_arima") %>% 
        fit(cnt ~ dteday, data = ts_train)
## frequency = 7 observations per 1 week

Using Prophet Forecasting Algorithm to Train Model

prophet_model <- 
        prophet_reg(
                seasonality_yearly = TRUE,
                seasonality_weekly = TRUE,
                seasonality_daily = TRUE
        ) %>%
        set_engine("prophet") %>%
        fit(cnt ~ dteday, data = ts_train)

Using Machine Learning Algorithm TSLM to Train Model

tslm_model <-
        linear_reg() %>% 
        set_engine("lm") %>% 
        fit(cnt ~ as.numeric(dteday) + factor(month(dteday, label = TRUE)),
            data = ts_train)

Forecasting

forecast_table <- modeltime_table(
        arima_model,
        prophet_model,
        tslm_model
        )


# Calibrate and Metrics
forecast_table %>%
  modeltime_calibrate(ts_test) %>%
  modeltime_accuracy()
## # A tibble: 3 × 9
##   .model_id .model_desc             .type   mae  mape  mase smape  rmse   rsq
##       <int> <chr>                   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1         1 ARIMA(1,1,3) WITH DRIFT Test  1622.  218.  1.95  26.9 2432. 0.262
## 2         2 PROPHET                 Test   971.  153.  1.17  18.8 1357. 0.379
## 3         3 LM                      Test   980.  162.  1.18  19.0 1396. 0.338
# Calibrate, Forecast and Plot
forecast_table %>%
  modeltime_calibrate(ts_test) %>% 
  modeltime_forecast(actual_data = ts_test) %>% 
  plot_modeltime_forecast()
## Using '.calibration_data' to forecast.
# Calibrate, Forecast on new data, and Plot
forecast_table %>%
  modeltime_calibrate(ts_test) %>% 
  modeltime_forecast(new_data = ts_test,
                     actual_data = tsdata) %>% 
  plot_modeltime_forecast()

Final Forecasting

# Final fit to entire dataset
future_forecast <- forecast_table %>%
  modeltime_refit(tsdata) %>%
  modeltime_calibrate(tsdata) %>% 
  modeltime_forecast(
    h = "3 months",
    actual_data = tsdata
  )
## frequency = 7 observations per 1 week
future_forecast %>%
  plot_modeltime_forecast()