下載確診資料

confirmed_url <- 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
confirmed_file <- 'time_series_covid19_confirmed_global.csv'
download.file(confirmed_url, confirmed_file)

載入確診資料

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.2
## ── Attaching packages ───────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.0
## ✓ tidyr   1.1.0     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## Warning: package 'tibble' was built under R version 4.0.2
## ── Conflicts ──────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
confirmed <- read_csv("time_series_covid19_confirmed_global.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
head(confirmed)
## # A tibble: 6 x 256
##   `Province/State` `Country/Region`   Lat   Long `1/22/20` `1/23/20` `1/24/20`
##   <chr>            <chr>            <dbl>  <dbl>     <dbl>     <dbl>     <dbl>
## 1 <NA>             Afghanistan       33.9  67.7          0         0         0
## 2 <NA>             Albania           41.2  20.2          0         0         0
## 3 <NA>             Algeria           28.0   1.66         0         0         0
## 4 <NA>             Andorra           42.5   1.52         0         0         0
## 5 <NA>             Angola           -11.2  17.9          0         0         0
## 6 <NA>             Antigua and Bar…  17.1 -61.8          0         0         0
## # … with 249 more variables: `1/25/20` <dbl>, `1/26/20` <dbl>, `1/27/20` <dbl>,
## #   `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>, `1/31/20` <dbl>,
## #   `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>, `2/4/20` <dbl>,
## #   `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>, `2/8/20` <dbl>,
## #   `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>, `2/12/20` <dbl>,
## #   `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>, `2/16/20` <dbl>,
## #   `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>, `2/20/20` <dbl>,
## #   `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>, `2/24/20` <dbl>,
## #   `2/25/20` <dbl>, `2/26/20` <dbl>, `2/27/20` <dbl>, `2/28/20` <dbl>,
## #   `2/29/20` <dbl>, `3/1/20` <dbl>, `3/2/20` <dbl>, `3/3/20` <dbl>,
## #   `3/4/20` <dbl>, `3/5/20` <dbl>, `3/6/20` <dbl>, `3/7/20` <dbl>,
## #   `3/8/20` <dbl>, `3/9/20` <dbl>, `3/10/20` <dbl>, `3/11/20` <dbl>,
## #   `3/12/20` <dbl>, `3/13/20` <dbl>, `3/14/20` <dbl>, `3/15/20` <dbl>,
## #   `3/16/20` <dbl>, `3/17/20` <dbl>, `3/18/20` <dbl>, `3/19/20` <dbl>,
## #   `3/20/20` <dbl>, `3/21/20` <dbl>, `3/22/20` <dbl>, `3/23/20` <dbl>,
## #   `3/24/20` <dbl>, `3/25/20` <dbl>, `3/26/20` <dbl>, `3/27/20` <dbl>,
## #   `3/28/20` <dbl>, `3/29/20` <dbl>, `3/30/20` <dbl>, `3/31/20` <dbl>,
## #   `4/1/20` <dbl>, `4/2/20` <dbl>, `4/3/20` <dbl>, `4/4/20` <dbl>,
## #   `4/5/20` <dbl>, `4/6/20` <dbl>, `4/7/20` <dbl>, `4/8/20` <dbl>,
## #   `4/9/20` <dbl>, `4/10/20` <dbl>, `4/11/20` <dbl>, `4/12/20` <dbl>,
## #   `4/13/20` <dbl>, `4/14/20` <dbl>, `4/15/20` <dbl>, `4/16/20` <dbl>,
## #   `4/17/20` <dbl>, `4/18/20` <dbl>, `4/19/20` <dbl>, `4/20/20` <dbl>,
## #   `4/21/20` <dbl>, `4/22/20` <dbl>, `4/23/20` <dbl>, `4/24/20` <dbl>,
## #   `4/25/20` <dbl>, `4/26/20` <dbl>, `4/27/20` <dbl>, `4/28/20` <dbl>,
## #   `4/29/20` <dbl>, `4/30/20` <dbl>, `5/1/20` <dbl>, `5/2/20` <dbl>,
## #   `5/3/20` <dbl>, …
col_names <- colnames(confirmed)
date_cols <- col_names[5:length(col_names)]

covid19_tidy_df <- confirmed %>%
  gather(Date, Case_Number, date_cols)
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(date_cols)` instead of `date_cols` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
head(covid19_tidy_df)
## # A tibble: 6 x 6
##   `Province/State` `Country/Region`      Lat   Long Date    Case_Number
##   <chr>            <chr>               <dbl>  <dbl> <chr>         <dbl>
## 1 <NA>             Afghanistan          33.9  67.7  1/22/20           0
## 2 <NA>             Albania              41.2  20.2  1/22/20           0
## 3 <NA>             Algeria              28.0   1.66 1/22/20           0
## 4 <NA>             Andorra              42.5   1.52 1/22/20           0
## 5 <NA>             Angola              -11.2  17.9  1/22/20           0
## 6 <NA>             Antigua and Barbuda  17.1 -61.8  1/22/20           0
covid19_tidy_df$Date <- as.Date(covid19_tidy_df$Date, format = '%m/%d/%y')

head(covid19_tidy_df)
## # A tibble: 6 x 6
##   `Province/State` `Country/Region`      Lat   Long Date       Case_Number
##   <chr>            <chr>               <dbl>  <dbl> <date>           <dbl>
## 1 <NA>             Afghanistan          33.9  67.7  2020-01-22           0
## 2 <NA>             Albania              41.2  20.2  2020-01-22           0
## 3 <NA>             Algeria              28.0   1.66 2020-01-22           0
## 4 <NA>             Andorra              42.5   1.52 2020-01-22           0
## 5 <NA>             Angola              -11.2  17.9  2020-01-22           0
## 6 <NA>             Antigua and Barbuda  17.1 -61.8  2020-01-22           0

彙整確診資料

time_series_covid19_confirmed_global  <- covid19_tidy_df %>%
  group_by(Date) %>%
  summarize(Case_Number = sum(Case_Number))
## `summarise()` ungrouping output (override with `.groups` argument)
head(time_series_covid19_confirmed_global)
## # A tibble: 6 x 2
##   Date       Case_Number
##   <date>           <dbl>
## 1 2020-01-22         555
## 2 2020-01-23         654
## 3 2020-01-24         941
## 4 2020-01-25        1434
## 5 2020-01-26        2118
## 6 2020-01-27        2927
plot(Case_Number ~ Date, time_series_covid19_confirmed_global, type = 'l')

time_series_covid19_confirmed_global$Daily_Case <- c(0, diff(time_series_covid19_confirmed_global$Case_Number))


head(time_series_covid19_confirmed_global)
## # A tibble: 6 x 3
##   Date       Case_Number Daily_Case
##   <date>           <dbl>      <dbl>
## 1 2020-01-22         555          0
## 2 2020-01-23         654         99
## 3 2020-01-24         941        287
## 4 2020-01-25        1434        493
## 5 2020-01-26        2118        684
## 6 2020-01-27        2927        809
plot(Daily_Case ~ Date, time_series_covid19_confirmed_global, type = 'l')

## 繪製時間序列

#install.packages('timetk')
library(timetk)
time_series_covid19_confirmed_global %>%
 plot_time_series(Date, Daily_Case, .interactive = FALSE)

time_series_covid19_confirmed_global %>%
 plot_time_series(Date, Daily_Case, .interactive = TRUE)

建立訓練與測試資料集

splits <- time_series_covid19_confirmed_global %>%
  time_series_split(assess = "5 days", cumulative = TRUE)
## Using date_var: Date
splits %>%
 tk_time_series_cv_plan() %>%
 plot_time_series_cv_plan(Date, Daily_Case, .interactive = FALSE)

## ARIMA

# install.package('tidymodels')
# install.packages('modeltime')
library(tidymodels)
## Warning: package 'tidymodels' was built under R version 4.0.2
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## ── Attaching packages ──────────────────────────────────── tidymodels 0.1.1 ──
## ✓ broom     0.7.0      ✓ recipes   0.1.13
## ✓ dials     0.0.9      ✓ rsample   0.0.8 
## ✓ infer     0.5.3      ✓ tune      0.1.1 
## ✓ modeldata 0.0.2      ✓ workflows 0.2.0 
## ✓ parsnip   0.1.3      ✓ yardstick 0.0.7
## Warning: package 'broom' was built under R version 4.0.2
## Warning: package 'dials' was built under R version 4.0.2
## Warning: package 'infer' was built under R version 4.0.2
## Warning: package 'modeldata' was built under R version 4.0.2
## Warning: package 'parsnip' was built under R version 4.0.2
## Warning: package 'recipes' was built under R version 4.0.2
## Warning: package 'rsample' was built under R version 4.0.2
## Warning: package 'tune' was built under R version 4.0.2
## Warning: package 'workflows' was built under R version 4.0.2
## Warning: package 'yardstick' was built under R version 4.0.2
## ── Conflicts ─────────────────────────────────────── tidymodels_conflicts() ──
## x scales::discard() masks purrr::discard()
## x dplyr::filter()   masks stats::filter()
## x recipes::fixed()  masks stringr::fixed()
## x dplyr::lag()      masks stats::lag()
## x yardstick::spec() masks readr::spec()
## x recipes::step()   masks stats::step()
library(modeltime)

model_fit_arima <- arima_reg() %>%
 set_engine("auto_arima") %>%
 fit(Daily_Case ~ Date, training(splits))
## frequency = 7 observations per 1 week
#?arima_reg

model_fit_arima
## parsnip model object
## 
## Fit time:  431ms 
## Series: outcome 
## ARIMA(1,0,1)(0,1,1)[7] with drift 
## 
## Coefficients:
##          ar1      ma1     sma1      drift
##       0.9645  -0.6646  -0.5164  1187.0008
## s.e.  0.0191   0.0475   0.0625   441.6182
## 
## sigma^2 estimated as 127258261:  log likelihood=-2579.14
## AIC=5168.28   AICc=5168.53   BIC=5185.68

Prophet

model_fit_prophet <- prophet_reg() %>%
 set_engine("prophet", yearly.seasonality = TRUE) %>%
 fit(Daily_Case ~ Date, training(splits))
## Warning: The following arguments cannot be manually modified and were removed:
## yearly.seasonality.
## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.

Machine Learning Model

head(time_series_covid19_confirmed_global)
## # A tibble: 6 x 3
##   Date       Case_Number Daily_Case
##   <date>           <dbl>      <dbl>
## 1 2020-01-22         555          0
## 2 2020-01-23         654         99
## 3 2020-01-24         941        287
## 4 2020-01-25        1434        493
## 5 2020-01-26        2118        684
## 6 2020-01-27        2927        809
?recipe
?juice
?step_timeseries_signature
library(recipes)
recipe_spec <- recipe(Daily_Case ~ Date, training(splits)) %>%
step_timeseries_signature(Date) %>%
 step_rm(contains("am.pm"), contains("year"), contains("hour"), contains("minute"), contains("second"), contains("xts"), 'Date_index.num') %>% 
  step_fourier(Date, period = 365, K = 5)  %>%
  step_dummy(all_nominal())

df <-  recipe_spec %>% prep() %>% juice()

dim(df)
## [1] 247  44

ElasticNet

model_spec_glmnet <- linear_reg(penalty = 0.01, mixture = 0.5) %>% set_engine("glmnet")

workflow_fit_glmnet <- workflow() %>%
 add_model(model_spec_glmnet) %>%
 add_recipe(recipe_spec %>% step_rm(Date)) %>%
 fit(training(splits))

#workflow_fit_glmnet

RandomForest

model_spec_rf <- rand_forest(trees = 500, min_n = 50) %>%
 set_engine("randomForest")

workflow_fit_rf <- workflow() %>%
 add_model(model_spec_rf) %>%
 add_recipe(recipe_spec %>% step_rm(Date)) %>%
 fit(training(splits))

XGBoost

model_spec_prophet_boost <- prophet_boost() %>%
 set_engine("prophet_xgboost", yearly.seasonality = TRUE)

workflow_fit_prophet_boost <- workflow() %>%
 add_model(model_spec_prophet_boost) %>%
 add_recipe(recipe_spec) %>%
 fit(training(splits))
## Warning: The following arguments cannot be manually modified and were removed:
## yearly.seasonality.
## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.

Model Table

model_table <- modeltime_table(
 model_fit_arima,
 model_fit_prophet,
 workflow_fit_glmnet,
 workflow_fit_rf,
 workflow_fit_prophet_boost
)

model_table
## # Modeltime Table
## # A tibble: 5 x 3
##   .model_id .model     .model_desc                      
##       <int> <list>     <chr>                            
## 1         1 <fit[+]>   ARIMA(1,0,1)(0,1,1)[7] WITH DRIFT
## 2         2 <fit[+]>   PROPHET                          
## 3         3 <workflow> GLMNET                           
## 4         4 <workflow> RANDOMFOREST                     
## 5         5 <workflow> PROPHET W/ XGBOOST ERRORS

Calibration

calibration_table <- model_table %>%
 modeltime_calibrate(testing(splits))

calibration_table
## # Modeltime Table
## # A tibble: 5 x 5
##   .model_id .model     .model_desc                       .type .calibration_data
##       <int> <list>     <chr>                             <chr> <list>           
## 1         1 <fit[+]>   ARIMA(1,0,1)(0,1,1)[7] WITH DRIFT Test  <tibble [5 × 4]> 
## 2         2 <fit[+]>   PROPHET                           Test  <tibble [5 × 4]> 
## 3         3 <workflow> GLMNET                            Test  <tibble [5 × 4]> 
## 4         4 <workflow> RANDOMFOREST                      Test  <tibble [5 × 4]> 
## 5         5 <workflow> PROPHET W/ XGBOOST ERRORS         Test  <tibble [5 × 4]>

Forecasting

calibration_table %>%
 modeltime_forecast(actual_data = time_series_covid19_confirmed_global) %>%
 plot_modeltime_forecast(.interactive = TRUE)
## Using '.calibration_data' to forecast.
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

## Evaluation

#head(time_series_covid19_confirmed_global)
calibration_table %>%
 modeltime_accuracy() %>%
 table_modeltime_accuracy(.interactive = FALSE)

Accuracy Table
.model_id .model_desc .type mae mape mase smape rmse rsq
1 ARIMA(1,0,1)(0,1,1)[7] WITH DRIFT Test 30353.49 13.22 0.60 11.62 41686.11 0.58
2 PROPHET Test 35280.16 14.93 0.69 13.29 44837.95 0.33
3 GLMNET Test 50719.68 21.60 1.00 18.43 61426.80 0.05
4 RANDOMFOREST Test 29234.16 11.66 0.57 11.09 37537.14 0.28
5 PROPHET W/ XGBOOST ERRORS Test 62881.15 26.36 1.23 22.20 72017.32 0.59
## Forecast 3 Months

ret <- calibration_table %>%
 modeltime_refit(time_series_covid19_confirmed_global) %>%
 modeltime_forecast(h = "3 months", actual_data = time_series_covid19_confirmed_global) 
## frequency = 7 observations per 1 week
## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## Warning: The following arguments cannot be manually modified and were removed:
## yearly.seasonality.
## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
#ret %>% filter(.model_id == 2)

ret %>%
 plot_modeltime_forecast(.interactive = TRUE, .title = "Compare all models forecast Plot", .y_lab = "New daily cases")
ret %>% 
 filter(.model_id %in% c(NA,5) ) %>%
 plot_modeltime_forecast(.interactive = TRUE, .title = "Compare all models forecast Plot", .y_lab = "New daily cases")
tail(time_series_covid19_confirmed_global)
## # A tibble: 6 x 3
##   Date       Case_Number Daily_Case
##   <date>           <dbl>      <dbl>
## 1 2020-09-24    32227277     360934
## 2 2020-09-25    32562075     334798
## 3 2020-09-26    32840012     277937
## 4 2020-09-27    33077724     237712
## 5 2020-09-28    33353615     275891
## 6 2020-09-29    33561081     207466
# 33561081

ret %>% 
 filter(.model_id %in% c(5) ) %>%
  select(.index, .value) %>%
  mutate(total = cumsum(.value)) %>%
  mutate(predict_total = total +  33561081) %>%
  filter(predict_total >= 40000000)
## # A tibble: 68 x 4
##    .index      .value    total predict_total
##    <date>       <dbl>    <dbl>         <dbl>
##  1 2020-10-23 299040. 6643916.     40204997.
##  2 2020-10-24 290348. 6934263.     40495344.
##  3 2020-10-25 265987. 7200250.     40761331.
##  4 2020-10-26 287516. 7487766.     41048847.
##  5 2020-10-27 282705. 7770472.     41331553.
##  6 2020-10-28 299271. 8069742.     41630823.
##  7 2020-10-29 314961. 8384703.     41945784.
##  8 2020-10-30 318413. 8703116.     42264197.
##  9 2020-10-31 310782. 9013898.     42574979.
## 10 2020-11-01 280488. 9294386.     42855467.
## # … with 58 more rows
# model1 : 10/24
# model2 : 10/21
# model3 : 10/22
# model4 : 10/25
# model5 : 10/23


calibration_table %>%
 filter(.model_id == 1) %>%
 modeltime_refit(time_series_covid19_confirmed_global) %>%
 modeltime_forecast(h = "3 months", actual_data = time_series_covid19_confirmed_global) %>%
 plot_modeltime_forecast(.interactive = FALSE, .title = "ARIMA model forecast Plot", .y_lab = "New daily cases")
## frequency = 7 observations per 1 week
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## US

time_series_covid19_confirmed_US  <- covid19_tidy_df %>%
  filter(`Country/Region` == 'US') %>%
  filter(`Date` >= '2020-03-01') %>%
  group_by(Date) %>%
  summarize(Case_Number = sum(Case_Number))
## `summarise()` ungrouping output (override with `.groups` argument)
time_series_covid19_confirmed_US$Daily_Case <- c(0, diff(time_series_covid19_confirmed_US$Case_Number))


time_series_covid19_confirmed_US %>%
 plot_time_series(Date, Daily_Case, .interactive = TRUE)

Split Data

splits <- time_series_covid19_confirmed_US %>%
  time_series_split(assess = "30 days", cumulative = TRUE)
## Using date_var: Date
splits %>%
 tk_time_series_cv_plan() %>%
 plot_time_series_cv_plan(Date, Daily_Case, .interactive = TRUE)
## Preprocessing
recipe_spec <- recipe(Daily_Case ~ Date, training(splits)) %>%
step_timeseries_signature(Date) %>%
 step_rm(contains("am.pm"), contains("year"), contains("hour"), contains("minute"), contains("second"), contains("xts"), 'Date_index.num') %>% 
  step_fourier(Date, period = 365, K = 5)  %>%
  step_dummy(all_nominal())

df <-  recipe_spec %>% prep() %>% juice()
head(df)
## # A tibble: 6 x 44
##   Date       Daily_Case Date_half Date_quarter Date_month Date_day Date_wday
##   <date>          <dbl>     <int>        <int>      <int>    <int>     <int>
## 1 2020-03-01          0         1            1          3        1         1
## 2 2020-03-02         22         1            1          3        2         2
## 3 2020-03-03         20         1            1          3        3         3
## 4 2020-03-04         33         1            1          3        4         4
## 5 2020-03-05         77         1            1          3        5         5
## 6 2020-03-06         53         1            1          3        6         6
## # … with 37 more variables: Date_mday <int>, Date_qday <int>, Date_yday <int>,
## #   Date_mweek <int>, Date_week <int>, Date_week.iso <int>, Date_week2 <int>,
## #   Date_week3 <int>, Date_week4 <int>, Date_mday7 <int>, Date_sin365_K1 <dbl>,
## #   Date_cos365_K1 <dbl>, Date_sin365_K2 <dbl>, Date_cos365_K2 <dbl>,
## #   Date_sin365_K3 <dbl>, Date_cos365_K3 <dbl>, Date_sin365_K4 <dbl>,
## #   Date_cos365_K4 <dbl>, Date_sin365_K5 <dbl>, Date_cos365_K5 <dbl>,
## #   Date_month.lbl_01 <dbl>, Date_month.lbl_02 <dbl>, Date_month.lbl_03 <dbl>,
## #   Date_month.lbl_04 <dbl>, Date_month.lbl_05 <dbl>, Date_month.lbl_06 <dbl>,
## #   Date_month.lbl_07 <dbl>, Date_month.lbl_08 <dbl>, Date_month.lbl_09 <dbl>,
## #   Date_month.lbl_10 <dbl>, Date_month.lbl_11 <dbl>, Date_wday.lbl_1 <dbl>,
## #   Date_wday.lbl_2 <dbl>, Date_wday.lbl_3 <dbl>, Date_wday.lbl_4 <dbl>,
## #   Date_wday.lbl_5 <dbl>, Date_wday.lbl_6 <dbl>

Training Model

model_fit_arima <- arima_reg() %>%
 set_engine("auto_arima") %>%
 fit(Daily_Case ~ Date, training(splits))
## frequency = 7 observations per 1 week
model_fit_prophet <- prophet_reg() %>%
 set_engine("prophet", yearly.seasonality = TRUE) %>%
 fit(Daily_Case ~ Date, training(splits))
## Warning: The following arguments cannot be manually modified and were removed:
## yearly.seasonality.
## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
model_spec_glmnet <- linear_reg(penalty = 0.01, mixture = 0.5) %>% set_engine("glmnet")

workflow_fit_glmnet <- workflow() %>%
 add_model(model_spec_glmnet) %>%
 add_recipe(recipe_spec %>% step_rm(Date)) %>%
 fit(training(splits))

model_spec_rf <- rand_forest(trees = 500, min_n = 50) %>%
 set_engine("randomForest")

workflow_fit_rf <- workflow() %>%
 add_model(model_spec_rf) %>%
 add_recipe(recipe_spec %>% step_rm(Date)) %>%
 fit(training(splits))


model_spec_prophet_boost <- prophet_boost() %>%
 set_engine("prophet_xgboost", yearly.seasonality = TRUE)

workflow_fit_prophet_boost <- workflow() %>%
 add_model(model_spec_prophet_boost) %>%
 add_recipe(recipe_spec) %>%
 fit(training(splits))
## Warning: The following arguments cannot be manually modified and were removed:
## yearly.seasonality.
## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
model_table <- modeltime_table(
 model_fit_arima,
 model_fit_prophet,
 workflow_fit_glmnet,
 workflow_fit_rf,
 workflow_fit_prophet_boost
)

Evaluation

calibration_table <- model_table %>%
 modeltime_calibrate(testing(splits))

calibration_table %>%
 modeltime_accuracy() %>%
 table_modeltime_accuracy(.interactive = FALSE)
Accuracy Table
.model_id .model_desc .type mae mape mase smape rmse rsq
1 ARIMA(1,1,0)(0,1,1)[7] Test 5567.94 14.46 0.83 14.71 7221.18 0.17
2 PROPHET Test 23099.14 62.04 3.45 45.90 23763.13 0.39
3 GLMNET Test 17968.62 46.96 2.69 35.41 21541.65 0.12
4 RANDOMFOREST Test 8612.36 24.92 1.29 20.77 10292.23 0.10
5 PROPHET W/ XGBOOST ERRORS Test 14204.31 37.89 2.12 29.70 17353.53 0.16

Forecasting

calibration_table %>%
 modeltime_forecast(actual_data = time_series_covid19_confirmed_US) %>%
 plot_modeltime_forecast(.interactive = TRUE)
## Using '.calibration_data' to forecast.

## 3 Months Prediction

ret <- calibration_table %>%
 modeltime_refit(time_series_covid19_confirmed_US) %>%
 modeltime_forecast(h = "3 months", actual_data = time_series_covid19_confirmed_US) 
## frequency = 7 observations per 1 week
## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## Warning: The following arguments cannot be manually modified and were removed:
## yearly.seasonality.
## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
ret %>%
 plot_modeltime_forecast(.interactive = TRUE, .title = "Compare all models forecast Plot", .y_lab = "New daily cases")