confirmed_url <- 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
confirmed_file <- 'time_series_covid19_confirmed_global.csv'
download.file(confirmed_url, confirmed_file)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.2
## ── Attaching packages ───────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.0
## ✓ tidyr 1.1.0 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## Warning: package 'tibble' was built under R version 4.0.2
## ── Conflicts ──────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
confirmed <- read_csv("time_series_covid19_confirmed_global.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Province/State` = col_character(),
## `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
head(confirmed)
## # A tibble: 6 x 256
## `Province/State` `Country/Region` Lat Long `1/22/20` `1/23/20` `1/24/20`
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 <NA> Afghanistan 33.9 67.7 0 0 0
## 2 <NA> Albania 41.2 20.2 0 0 0
## 3 <NA> Algeria 28.0 1.66 0 0 0
## 4 <NA> Andorra 42.5 1.52 0 0 0
## 5 <NA> Angola -11.2 17.9 0 0 0
## 6 <NA> Antigua and Bar… 17.1 -61.8 0 0 0
## # … with 249 more variables: `1/25/20` <dbl>, `1/26/20` <dbl>, `1/27/20` <dbl>,
## # `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>, `1/31/20` <dbl>,
## # `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>, `2/4/20` <dbl>,
## # `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>, `2/8/20` <dbl>,
## # `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>, `2/12/20` <dbl>,
## # `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>, `2/16/20` <dbl>,
## # `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>, `2/20/20` <dbl>,
## # `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>, `2/24/20` <dbl>,
## # `2/25/20` <dbl>, `2/26/20` <dbl>, `2/27/20` <dbl>, `2/28/20` <dbl>,
## # `2/29/20` <dbl>, `3/1/20` <dbl>, `3/2/20` <dbl>, `3/3/20` <dbl>,
## # `3/4/20` <dbl>, `3/5/20` <dbl>, `3/6/20` <dbl>, `3/7/20` <dbl>,
## # `3/8/20` <dbl>, `3/9/20` <dbl>, `3/10/20` <dbl>, `3/11/20` <dbl>,
## # `3/12/20` <dbl>, `3/13/20` <dbl>, `3/14/20` <dbl>, `3/15/20` <dbl>,
## # `3/16/20` <dbl>, `3/17/20` <dbl>, `3/18/20` <dbl>, `3/19/20` <dbl>,
## # `3/20/20` <dbl>, `3/21/20` <dbl>, `3/22/20` <dbl>, `3/23/20` <dbl>,
## # `3/24/20` <dbl>, `3/25/20` <dbl>, `3/26/20` <dbl>, `3/27/20` <dbl>,
## # `3/28/20` <dbl>, `3/29/20` <dbl>, `3/30/20` <dbl>, `3/31/20` <dbl>,
## # `4/1/20` <dbl>, `4/2/20` <dbl>, `4/3/20` <dbl>, `4/4/20` <dbl>,
## # `4/5/20` <dbl>, `4/6/20` <dbl>, `4/7/20` <dbl>, `4/8/20` <dbl>,
## # `4/9/20` <dbl>, `4/10/20` <dbl>, `4/11/20` <dbl>, `4/12/20` <dbl>,
## # `4/13/20` <dbl>, `4/14/20` <dbl>, `4/15/20` <dbl>, `4/16/20` <dbl>,
## # `4/17/20` <dbl>, `4/18/20` <dbl>, `4/19/20` <dbl>, `4/20/20` <dbl>,
## # `4/21/20` <dbl>, `4/22/20` <dbl>, `4/23/20` <dbl>, `4/24/20` <dbl>,
## # `4/25/20` <dbl>, `4/26/20` <dbl>, `4/27/20` <dbl>, `4/28/20` <dbl>,
## # `4/29/20` <dbl>, `4/30/20` <dbl>, `5/1/20` <dbl>, `5/2/20` <dbl>,
## # `5/3/20` <dbl>, …
col_names <- colnames(confirmed)
date_cols <- col_names[5:length(col_names)]
covid19_tidy_df <- confirmed %>%
gather(Date, Case_Number, date_cols)
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(date_cols)` instead of `date_cols` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
head(covid19_tidy_df)
## # A tibble: 6 x 6
## `Province/State` `Country/Region` Lat Long Date Case_Number
## <chr> <chr> <dbl> <dbl> <chr> <dbl>
## 1 <NA> Afghanistan 33.9 67.7 1/22/20 0
## 2 <NA> Albania 41.2 20.2 1/22/20 0
## 3 <NA> Algeria 28.0 1.66 1/22/20 0
## 4 <NA> Andorra 42.5 1.52 1/22/20 0
## 5 <NA> Angola -11.2 17.9 1/22/20 0
## 6 <NA> Antigua and Barbuda 17.1 -61.8 1/22/20 0
covid19_tidy_df$Date <- as.Date(covid19_tidy_df$Date, format = '%m/%d/%y')
head(covid19_tidy_df)
## # A tibble: 6 x 6
## `Province/State` `Country/Region` Lat Long Date Case_Number
## <chr> <chr> <dbl> <dbl> <date> <dbl>
## 1 <NA> Afghanistan 33.9 67.7 2020-01-22 0
## 2 <NA> Albania 41.2 20.2 2020-01-22 0
## 3 <NA> Algeria 28.0 1.66 2020-01-22 0
## 4 <NA> Andorra 42.5 1.52 2020-01-22 0
## 5 <NA> Angola -11.2 17.9 2020-01-22 0
## 6 <NA> Antigua and Barbuda 17.1 -61.8 2020-01-22 0
time_series_covid19_confirmed_global <- covid19_tidy_df %>%
group_by(Date) %>%
summarize(Case_Number = sum(Case_Number))
## `summarise()` ungrouping output (override with `.groups` argument)
head(time_series_covid19_confirmed_global)
## # A tibble: 6 x 2
## Date Case_Number
## <date> <dbl>
## 1 2020-01-22 555
## 2 2020-01-23 654
## 3 2020-01-24 941
## 4 2020-01-25 1434
## 5 2020-01-26 2118
## 6 2020-01-27 2927
plot(Case_Number ~ Date, time_series_covid19_confirmed_global, type = 'l')
time_series_covid19_confirmed_global$Daily_Case <- c(0, diff(time_series_covid19_confirmed_global$Case_Number))
head(time_series_covid19_confirmed_global)
## # A tibble: 6 x 3
## Date Case_Number Daily_Case
## <date> <dbl> <dbl>
## 1 2020-01-22 555 0
## 2 2020-01-23 654 99
## 3 2020-01-24 941 287
## 4 2020-01-25 1434 493
## 5 2020-01-26 2118 684
## 6 2020-01-27 2927 809
plot(Daily_Case ~ Date, time_series_covid19_confirmed_global, type = 'l')
## 繪製時間序列
#install.packages('timetk')
library(timetk)
time_series_covid19_confirmed_global %>%
plot_time_series(Date, Daily_Case, .interactive = FALSE)
time_series_covid19_confirmed_global %>%
plot_time_series(Date, Daily_Case, .interactive = TRUE)
splits <- time_series_covid19_confirmed_global %>%
time_series_split(assess = "5 days", cumulative = TRUE)
## Using date_var: Date
splits %>%
tk_time_series_cv_plan() %>%
plot_time_series_cv_plan(Date, Daily_Case, .interactive = FALSE)
## ARIMA
# install.package('tidymodels')
# install.packages('modeltime')
library(tidymodels)
## Warning: package 'tidymodels' was built under R version 4.0.2
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## ── Attaching packages ──────────────────────────────────── tidymodels 0.1.1 ──
## ✓ broom 0.7.0 ✓ recipes 0.1.13
## ✓ dials 0.0.9 ✓ rsample 0.0.8
## ✓ infer 0.5.3 ✓ tune 0.1.1
## ✓ modeldata 0.0.2 ✓ workflows 0.2.0
## ✓ parsnip 0.1.3 ✓ yardstick 0.0.7
## Warning: package 'broom' was built under R version 4.0.2
## Warning: package 'dials' was built under R version 4.0.2
## Warning: package 'infer' was built under R version 4.0.2
## Warning: package 'modeldata' was built under R version 4.0.2
## Warning: package 'parsnip' was built under R version 4.0.2
## Warning: package 'recipes' was built under R version 4.0.2
## Warning: package 'rsample' was built under R version 4.0.2
## Warning: package 'tune' was built under R version 4.0.2
## Warning: package 'workflows' was built under R version 4.0.2
## Warning: package 'yardstick' was built under R version 4.0.2
## ── Conflicts ─────────────────────────────────────── tidymodels_conflicts() ──
## x scales::discard() masks purrr::discard()
## x dplyr::filter() masks stats::filter()
## x recipes::fixed() masks stringr::fixed()
## x dplyr::lag() masks stats::lag()
## x yardstick::spec() masks readr::spec()
## x recipes::step() masks stats::step()
library(modeltime)
model_fit_arima <- arima_reg() %>%
set_engine("auto_arima") %>%
fit(Daily_Case ~ Date, training(splits))
## frequency = 7 observations per 1 week
#?arima_reg
model_fit_arima
## parsnip model object
##
## Fit time: 431ms
## Series: outcome
## ARIMA(1,0,1)(0,1,1)[7] with drift
##
## Coefficients:
## ar1 ma1 sma1 drift
## 0.9645 -0.6646 -0.5164 1187.0008
## s.e. 0.0191 0.0475 0.0625 441.6182
##
## sigma^2 estimated as 127258261: log likelihood=-2579.14
## AIC=5168.28 AICc=5168.53 BIC=5185.68
model_fit_prophet <- prophet_reg() %>%
set_engine("prophet", yearly.seasonality = TRUE) %>%
fit(Daily_Case ~ Date, training(splits))
## Warning: The following arguments cannot be manually modified and were removed:
## yearly.seasonality.
## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
head(time_series_covid19_confirmed_global)
## # A tibble: 6 x 3
## Date Case_Number Daily_Case
## <date> <dbl> <dbl>
## 1 2020-01-22 555 0
## 2 2020-01-23 654 99
## 3 2020-01-24 941 287
## 4 2020-01-25 1434 493
## 5 2020-01-26 2118 684
## 6 2020-01-27 2927 809
?recipe
?juice
?step_timeseries_signature
library(recipes)
recipe_spec <- recipe(Daily_Case ~ Date, training(splits)) %>%
step_timeseries_signature(Date) %>%
step_rm(contains("am.pm"), contains("year"), contains("hour"), contains("minute"), contains("second"), contains("xts"), 'Date_index.num') %>%
step_fourier(Date, period = 365, K = 5) %>%
step_dummy(all_nominal())
df <- recipe_spec %>% prep() %>% juice()
dim(df)
## [1] 247 44
model_spec_glmnet <- linear_reg(penalty = 0.01, mixture = 0.5) %>% set_engine("glmnet")
workflow_fit_glmnet <- workflow() %>%
add_model(model_spec_glmnet) %>%
add_recipe(recipe_spec %>% step_rm(Date)) %>%
fit(training(splits))
#workflow_fit_glmnet
model_spec_rf <- rand_forest(trees = 500, min_n = 50) %>%
set_engine("randomForest")
workflow_fit_rf <- workflow() %>%
add_model(model_spec_rf) %>%
add_recipe(recipe_spec %>% step_rm(Date)) %>%
fit(training(splits))
model_spec_prophet_boost <- prophet_boost() %>%
set_engine("prophet_xgboost", yearly.seasonality = TRUE)
workflow_fit_prophet_boost <- workflow() %>%
add_model(model_spec_prophet_boost) %>%
add_recipe(recipe_spec) %>%
fit(training(splits))
## Warning: The following arguments cannot be manually modified and were removed:
## yearly.seasonality.
## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
model_table <- modeltime_table(
model_fit_arima,
model_fit_prophet,
workflow_fit_glmnet,
workflow_fit_rf,
workflow_fit_prophet_boost
)
model_table
## # Modeltime Table
## # A tibble: 5 x 3
## .model_id .model .model_desc
## <int> <list> <chr>
## 1 1 <fit[+]> ARIMA(1,0,1)(0,1,1)[7] WITH DRIFT
## 2 2 <fit[+]> PROPHET
## 3 3 <workflow> GLMNET
## 4 4 <workflow> RANDOMFOREST
## 5 5 <workflow> PROPHET W/ XGBOOST ERRORS
calibration_table <- model_table %>%
modeltime_calibrate(testing(splits))
calibration_table
## # Modeltime Table
## # A tibble: 5 x 5
## .model_id .model .model_desc .type .calibration_data
## <int> <list> <chr> <chr> <list>
## 1 1 <fit[+]> ARIMA(1,0,1)(0,1,1)[7] WITH DRIFT Test <tibble [5 × 4]>
## 2 2 <fit[+]> PROPHET Test <tibble [5 × 4]>
## 3 3 <workflow> GLMNET Test <tibble [5 × 4]>
## 4 4 <workflow> RANDOMFOREST Test <tibble [5 × 4]>
## 5 5 <workflow> PROPHET W/ XGBOOST ERRORS Test <tibble [5 × 4]>
calibration_table %>%
modeltime_forecast(actual_data = time_series_covid19_confirmed_global) %>%
plot_modeltime_forecast(.interactive = TRUE)
## Using '.calibration_data' to forecast.
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Evaluation
#head(time_series_covid19_confirmed_global)
calibration_table %>%
modeltime_accuracy() %>%
table_modeltime_accuracy(.interactive = FALSE)
Accuracy Table | ||||||||
---|---|---|---|---|---|---|---|---|
.model_id | .model_desc | .type | mae | mape | mase | smape | rmse | rsq |
1 | ARIMA(1,0,1)(0,1,1)[7] WITH DRIFT | Test | 30353.49 | 13.22 | 0.60 | 11.62 | 41686.11 | 0.58 |
2 | PROPHET | Test | 35280.16 | 14.93 | 0.69 | 13.29 | 44837.95 | 0.33 |
3 | GLMNET | Test | 50719.68 | 21.60 | 1.00 | 18.43 | 61426.80 | 0.05 |
4 | RANDOMFOREST | Test | 29234.16 | 11.66 | 0.57 | 11.09 | 37537.14 | 0.28 |
5 | PROPHET W/ XGBOOST ERRORS | Test | 62881.15 | 26.36 | 1.23 | 22.20 | 72017.32 | 0.59 |
ret <- calibration_table %>%
modeltime_refit(time_series_covid19_confirmed_global) %>%
modeltime_forecast(h = "3 months", actual_data = time_series_covid19_confirmed_global)
## frequency = 7 observations per 1 week
## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## Warning: The following arguments cannot be manually modified and were removed:
## yearly.seasonality.
## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
#ret %>% filter(.model_id == 2)
ret %>%
plot_modeltime_forecast(.interactive = TRUE, .title = "Compare all models forecast Plot", .y_lab = "New daily cases")
ret %>%
filter(.model_id %in% c(NA,5) ) %>%
plot_modeltime_forecast(.interactive = TRUE, .title = "Compare all models forecast Plot", .y_lab = "New daily cases")
tail(time_series_covid19_confirmed_global)
## # A tibble: 6 x 3
## Date Case_Number Daily_Case
## <date> <dbl> <dbl>
## 1 2020-09-24 32227277 360934
## 2 2020-09-25 32562075 334798
## 3 2020-09-26 32840012 277937
## 4 2020-09-27 33077724 237712
## 5 2020-09-28 33353615 275891
## 6 2020-09-29 33561081 207466
# 33561081
ret %>%
filter(.model_id %in% c(5) ) %>%
select(.index, .value) %>%
mutate(total = cumsum(.value)) %>%
mutate(predict_total = total + 33561081) %>%
filter(predict_total >= 40000000)
## # A tibble: 68 x 4
## .index .value total predict_total
## <date> <dbl> <dbl> <dbl>
## 1 2020-10-23 299040. 6643916. 40204997.
## 2 2020-10-24 290348. 6934263. 40495344.
## 3 2020-10-25 265987. 7200250. 40761331.
## 4 2020-10-26 287516. 7487766. 41048847.
## 5 2020-10-27 282705. 7770472. 41331553.
## 6 2020-10-28 299271. 8069742. 41630823.
## 7 2020-10-29 314961. 8384703. 41945784.
## 8 2020-10-30 318413. 8703116. 42264197.
## 9 2020-10-31 310782. 9013898. 42574979.
## 10 2020-11-01 280488. 9294386. 42855467.
## # … with 58 more rows
# model1 : 10/24
# model2 : 10/21
# model3 : 10/22
# model4 : 10/25
# model5 : 10/23
calibration_table %>%
filter(.model_id == 1) %>%
modeltime_refit(time_series_covid19_confirmed_global) %>%
modeltime_forecast(h = "3 months", actual_data = time_series_covid19_confirmed_global) %>%
plot_modeltime_forecast(.interactive = FALSE, .title = "ARIMA model forecast Plot", .y_lab = "New daily cases")
## frequency = 7 observations per 1 week
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf
## US
time_series_covid19_confirmed_US <- covid19_tidy_df %>%
filter(`Country/Region` == 'US') %>%
filter(`Date` >= '2020-03-01') %>%
group_by(Date) %>%
summarize(Case_Number = sum(Case_Number))
## `summarise()` ungrouping output (override with `.groups` argument)
time_series_covid19_confirmed_US$Daily_Case <- c(0, diff(time_series_covid19_confirmed_US$Case_Number))
time_series_covid19_confirmed_US %>%
plot_time_series(Date, Daily_Case, .interactive = TRUE)
splits <- time_series_covid19_confirmed_US %>%
time_series_split(assess = "30 days", cumulative = TRUE)
## Using date_var: Date
splits %>%
tk_time_series_cv_plan() %>%
plot_time_series_cv_plan(Date, Daily_Case, .interactive = TRUE)
## Preprocessing
recipe_spec <- recipe(Daily_Case ~ Date, training(splits)) %>%
step_timeseries_signature(Date) %>%
step_rm(contains("am.pm"), contains("year"), contains("hour"), contains("minute"), contains("second"), contains("xts"), 'Date_index.num') %>%
step_fourier(Date, period = 365, K = 5) %>%
step_dummy(all_nominal())
df <- recipe_spec %>% prep() %>% juice()
head(df)
## # A tibble: 6 x 44
## Date Daily_Case Date_half Date_quarter Date_month Date_day Date_wday
## <date> <dbl> <int> <int> <int> <int> <int>
## 1 2020-03-01 0 1 1 3 1 1
## 2 2020-03-02 22 1 1 3 2 2
## 3 2020-03-03 20 1 1 3 3 3
## 4 2020-03-04 33 1 1 3 4 4
## 5 2020-03-05 77 1 1 3 5 5
## 6 2020-03-06 53 1 1 3 6 6
## # … with 37 more variables: Date_mday <int>, Date_qday <int>, Date_yday <int>,
## # Date_mweek <int>, Date_week <int>, Date_week.iso <int>, Date_week2 <int>,
## # Date_week3 <int>, Date_week4 <int>, Date_mday7 <int>, Date_sin365_K1 <dbl>,
## # Date_cos365_K1 <dbl>, Date_sin365_K2 <dbl>, Date_cos365_K2 <dbl>,
## # Date_sin365_K3 <dbl>, Date_cos365_K3 <dbl>, Date_sin365_K4 <dbl>,
## # Date_cos365_K4 <dbl>, Date_sin365_K5 <dbl>, Date_cos365_K5 <dbl>,
## # Date_month.lbl_01 <dbl>, Date_month.lbl_02 <dbl>, Date_month.lbl_03 <dbl>,
## # Date_month.lbl_04 <dbl>, Date_month.lbl_05 <dbl>, Date_month.lbl_06 <dbl>,
## # Date_month.lbl_07 <dbl>, Date_month.lbl_08 <dbl>, Date_month.lbl_09 <dbl>,
## # Date_month.lbl_10 <dbl>, Date_month.lbl_11 <dbl>, Date_wday.lbl_1 <dbl>,
## # Date_wday.lbl_2 <dbl>, Date_wday.lbl_3 <dbl>, Date_wday.lbl_4 <dbl>,
## # Date_wday.lbl_5 <dbl>, Date_wday.lbl_6 <dbl>
model_fit_arima <- arima_reg() %>%
set_engine("auto_arima") %>%
fit(Daily_Case ~ Date, training(splits))
## frequency = 7 observations per 1 week
model_fit_prophet <- prophet_reg() %>%
set_engine("prophet", yearly.seasonality = TRUE) %>%
fit(Daily_Case ~ Date, training(splits))
## Warning: The following arguments cannot be manually modified and were removed:
## yearly.seasonality.
## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
model_spec_glmnet <- linear_reg(penalty = 0.01, mixture = 0.5) %>% set_engine("glmnet")
workflow_fit_glmnet <- workflow() %>%
add_model(model_spec_glmnet) %>%
add_recipe(recipe_spec %>% step_rm(Date)) %>%
fit(training(splits))
model_spec_rf <- rand_forest(trees = 500, min_n = 50) %>%
set_engine("randomForest")
workflow_fit_rf <- workflow() %>%
add_model(model_spec_rf) %>%
add_recipe(recipe_spec %>% step_rm(Date)) %>%
fit(training(splits))
model_spec_prophet_boost <- prophet_boost() %>%
set_engine("prophet_xgboost", yearly.seasonality = TRUE)
workflow_fit_prophet_boost <- workflow() %>%
add_model(model_spec_prophet_boost) %>%
add_recipe(recipe_spec) %>%
fit(training(splits))
## Warning: The following arguments cannot be manually modified and were removed:
## yearly.seasonality.
## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
model_table <- modeltime_table(
model_fit_arima,
model_fit_prophet,
workflow_fit_glmnet,
workflow_fit_rf,
workflow_fit_prophet_boost
)
calibration_table <- model_table %>%
modeltime_calibrate(testing(splits))
calibration_table %>%
modeltime_accuracy() %>%
table_modeltime_accuracy(.interactive = FALSE)
Accuracy Table | ||||||||
---|---|---|---|---|---|---|---|---|
.model_id | .model_desc | .type | mae | mape | mase | smape | rmse | rsq |
1 | ARIMA(1,1,0)(0,1,1)[7] | Test | 5567.94 | 14.46 | 0.83 | 14.71 | 7221.18 | 0.17 |
2 | PROPHET | Test | 23099.14 | 62.04 | 3.45 | 45.90 | 23763.13 | 0.39 |
3 | GLMNET | Test | 17968.62 | 46.96 | 2.69 | 35.41 | 21541.65 | 0.12 |
4 | RANDOMFOREST | Test | 8612.36 | 24.92 | 1.29 | 20.77 | 10292.23 | 0.10 |
5 | PROPHET W/ XGBOOST ERRORS | Test | 14204.31 | 37.89 | 2.12 | 29.70 | 17353.53 | 0.16 |
calibration_table %>%
modeltime_forecast(actual_data = time_series_covid19_confirmed_US) %>%
plot_modeltime_forecast(.interactive = TRUE)
## Using '.calibration_data' to forecast.
## 3 Months Prediction
ret <- calibration_table %>%
modeltime_refit(time_series_covid19_confirmed_US) %>%
modeltime_forecast(h = "3 months", actual_data = time_series_covid19_confirmed_US)
## frequency = 7 observations per 1 week
## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## Warning: The following arguments cannot be manually modified and were removed:
## yearly.seasonality.
## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
ret %>%
plot_modeltime_forecast(.interactive = TRUE, .title = "Compare all models forecast Plot", .y_lab = "New daily cases")