Demo20200930

下載確診資料

confirmed_url <- 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
confirmed_file <- 'time_series_covid19_confirmed_global.csv'
download.file(confirmed_url, confirmed_file)

載入確診資料

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.0.2

## ── Attaching packages ───────────────────────────────────── tidyverse 1.3.0 ──

## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.0
## ✓ tidyr   1.1.0     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0

## Warning: package 'tibble' was built under R version 4.0.2

## ── Conflicts ──────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

confirmed <- read_csv("time_series_covid19_confirmed_global.csv")

## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )

## See spec(...) for full column specifications.

head(confirmed)

## # A tibble: 6 x 256
##   `Province/State` `Country/Region`   Lat   Long `1/22/20` `1/23/20` `1/24/20`
##   <chr>            <chr>            <dbl>  <dbl>     <dbl>     <dbl>     <dbl>
## 1 <NA>             Afghanistan       33.9  67.7          0         0         0
## 2 <NA>             Albania           41.2  20.2          0         0         0
## 3 <NA>             Algeria           28.0   1.66         0         0         0
## 4 <NA>             Andorra           42.5   1.52         0         0         0
## 5 <NA>             Angola           -11.2  17.9          0         0         0
## 6 <NA>             Antigua and Bar…  17.1 -61.8          0         0         0
## # … with 249 more variables: `1/25/20` <dbl>, `1/26/20` <dbl>, `1/27/20` <dbl>,
## #   `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>, `1/31/20` <dbl>,
## #   `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>, `2/4/20` <dbl>,
## #   `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>, `2/8/20` <dbl>,
## #   `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>, `2/12/20` <dbl>,
## #   `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>, `2/16/20` <dbl>,
## #   `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>, `2/20/20` <dbl>,
## #   `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>, `2/24/20` <dbl>,
## #   `2/25/20` <dbl>, `2/26/20` <dbl>, `2/27/20` <dbl>, `2/28/20` <dbl>,
## #   `2/29/20` <dbl>, `3/1/20` <dbl>, `3/2/20` <dbl>, `3/3/20` <dbl>,
## #   `3/4/20` <dbl>, `3/5/20` <dbl>, `3/6/20` <dbl>, `3/7/20` <dbl>,
## #   `3/8/20` <dbl>, `3/9/20` <dbl>, `3/10/20` <dbl>, `3/11/20` <dbl>,
## #   `3/12/20` <dbl>, `3/13/20` <dbl>, `3/14/20` <dbl>, `3/15/20` <dbl>,
## #   `3/16/20` <dbl>, `3/17/20` <dbl>, `3/18/20` <dbl>, `3/19/20` <dbl>,
## #   `3/20/20` <dbl>, `3/21/20` <dbl>, `3/22/20` <dbl>, `3/23/20` <dbl>,
## #   `3/24/20` <dbl>, `3/25/20` <dbl>, `3/26/20` <dbl>, `3/27/20` <dbl>,
## #   `3/28/20` <dbl>, `3/29/20` <dbl>, `3/30/20` <dbl>, `3/31/20` <dbl>,
## #   `4/1/20` <dbl>, `4/2/20` <dbl>, `4/3/20` <dbl>, `4/4/20` <dbl>,
## #   `4/5/20` <dbl>, `4/6/20` <dbl>, `4/7/20` <dbl>, `4/8/20` <dbl>,
## #   `4/9/20` <dbl>, `4/10/20` <dbl>, `4/11/20` <dbl>, `4/12/20` <dbl>,
## #   `4/13/20` <dbl>, `4/14/20` <dbl>, `4/15/20` <dbl>, `4/16/20` <dbl>,
## #   `4/17/20` <dbl>, `4/18/20` <dbl>, `4/19/20` <dbl>, `4/20/20` <dbl>,
## #   `4/21/20` <dbl>, `4/22/20` <dbl>, `4/23/20` <dbl>, `4/24/20` <dbl>,
## #   `4/25/20` <dbl>, `4/26/20` <dbl>, `4/27/20` <dbl>, `4/28/20` <dbl>,
## #   `4/29/20` <dbl>, `4/30/20` <dbl>, `5/1/20` <dbl>, `5/2/20` <dbl>,
## #   `5/3/20` <dbl>, …

col_names <- colnames(confirmed)
date_cols <- col_names[5:length(col_names)]

covid19_tidy_df <- confirmed %>%
  gather(Date, Case_Number, date_cols)

## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(date_cols)` instead of `date_cols` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.

head(covid19_tidy_df)

## # A tibble: 6 x 6
##   `Province/State` `Country/Region`      Lat   Long Date    Case_Number
##   <chr>            <chr>               <dbl>  <dbl> <chr>         <dbl>
## 1 <NA>             Afghanistan          33.9  67.7  1/22/20           0
## 2 <NA>             Albania              41.2  20.2  1/22/20           0
## 3 <NA>             Algeria              28.0   1.66 1/22/20           0
## 4 <NA>             Andorra              42.5   1.52 1/22/20           0
## 5 <NA>             Angola              -11.2  17.9  1/22/20           0
## 6 <NA>             Antigua and Barbuda  17.1 -61.8  1/22/20           0

covid19_tidy_df$Date <- as.Date(covid19_tidy_df$Date, format = '%m/%d/%y')

head(covid19_tidy_df)

## # A tibble: 6 x 6
##   `Province/State` `Country/Region`      Lat   Long Date       Case_Number
##   <chr>            <chr>               <dbl>  <dbl> <date>           <dbl>
## 1 <NA>             Afghanistan          33.9  67.7  2020-01-22           0
## 2 <NA>             Albania              41.2  20.2  2020-01-22           0
## 3 <NA>             Algeria              28.0   1.66 2020-01-22           0
## 4 <NA>             Andorra              42.5   1.52 2020-01-22           0
## 5 <NA>             Angola              -11.2  17.9  2020-01-22           0
## 6 <NA>             Antigua and Barbuda  17.1 -61.8  2020-01-22           0

彙整確診資料

time_series_covid19_confirmed_global  <- covid19_tidy_df %>%
  group_by(Date) %>%
  summarize(Case_Number = sum(Case_Number))

## `summarise()` ungrouping output (override with `.groups` argument)

head(time_series_covid19_confirmed_global)

## # A tibble: 6 x 2
##   Date       Case_Number
##   <date>           <dbl>
## 1 2020-01-22         555
## 2 2020-01-23         654
## 3 2020-01-24         941
## 4 2020-01-25        1434
## 5 2020-01-26        2118
## 6 2020-01-27        2927

plot(Case_Number ~ Date, time_series_covid19_confirmed_global, type = 'l')

time_series_covid19_confirmed_global$Daily_Case <- c(0, diff(time_series_covid19_confirmed_global$Case_Number))


head(time_series_covid19_confirmed_global)

## # A tibble: 6 x 3
##   Date       Case_Number Daily_Case
##   <date>           <dbl>      <dbl>
## 1 2020-01-22         555          0
## 2 2020-01-23         654         99
## 3 2020-01-24         941        287
## 4 2020-01-25        1434        493
## 5 2020-01-26        2118        684
## 6 2020-01-27        2927        809

plot(Daily_Case ~ Date, time_series_covid19_confirmed_global, type = 'l')

## 繪製時間序列

#install.packages('timetk')
library(timetk)
time_series_covid19_confirmed_global %>%
 plot_time_series(Date, Daily_Case, .interactive = FALSE)

time_series_covid19_confirmed_global %>%
 plot_time_series(Date, Daily_Case, .interactive = TRUE)

建立訓練與測試資料集

splits <- time_series_covid19_confirmed_global %>%
  time_series_split(assess = "5 days", cumulative = TRUE)

## Using date_var: Date

splits %>%
 tk_time_series_cv_plan() %>%
 plot_time_series_cv_plan(Date, Daily_Case, .interactive = FALSE)

## ARIMA

AR: Autoregression (p)
I: Integrated (d)
MA: Moving Average (q)

# install.package('tidymodels')
# install.packages('modeltime')
library(tidymodels)

## Warning: package 'tidymodels' was built under R version 4.0.2

## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found
## Error in get(genname, envir = envir) : object 'required_pkgs' not found

## ── Attaching packages ──────────────────────────────────── tidymodels 0.1.1 ──

## ✓ broom     0.7.0      ✓ recipes   0.1.13
## ✓ dials     0.0.9      ✓ rsample   0.0.8 
## ✓ infer     0.5.3      ✓ tune      0.1.1 
## ✓ modeldata 0.0.2      ✓ workflows 0.2.0 
## ✓ parsnip   0.1.3      ✓ yardstick 0.0.7

## Warning: package 'broom' was built under R version 4.0.2

## Warning: package 'dials' was built under R version 4.0.2

## Warning: package 'infer' was built under R version 4.0.2

## Warning: package 'modeldata' was built under R version 4.0.2

## Warning: package 'parsnip' was built under R version 4.0.2

## Warning: package 'recipes' was built under R version 4.0.2

## Warning: package 'rsample' was built under R version 4.0.2

## Warning: package 'tune' was built under R version 4.0.2

## Warning: package 'workflows' was built under R version 4.0.2

## Warning: package 'yardstick' was built under R version 4.0.2

## ── Conflicts ─────────────────────────────────────── tidymodels_conflicts() ──
## x scales::discard() masks purrr::discard()
## x dplyr::filter()   masks stats::filter()
## x recipes::fixed()  masks stringr::fixed()
## x dplyr::lag()      masks stats::lag()
## x yardstick::spec() masks readr::spec()
## x recipes::step()   masks stats::step()

library(modeltime)

model_fit_arima <- arima_reg() %>%
 set_engine("auto_arima") %>%
 fit(Daily_Case ~ Date, training(splits))

## frequency = 7 observations per 1 week

#?arima_reg

model_fit_arima

## parsnip model object
## 
## Fit time:  431ms 
## Series: outcome 
## ARIMA(1,0,1)(0,1,1)[7] with drift 
## 
## Coefficients:
##          ar1      ma1     sma1      drift
##       0.9645  -0.6646  -0.5164  1187.0008
## s.e.  0.0191   0.0475   0.0625   441.6182
## 
## sigma^2 estimated as 127258261:  log likelihood=-2579.14
## AIC=5168.28   AICc=5168.53   BIC=5185.68

Prophet

model_fit_prophet <- prophet_reg() %>%
 set_engine("prophet", yearly.seasonality = TRUE) %>%
 fit(Daily_Case ~ Date, training(splits))

## Warning: The following arguments cannot be manually modified and were removed:
## yearly.seasonality.

## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.

## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.

Machine Learning Model

head(time_series_covid19_confirmed_global)

## # A tibble: 6 x 3
##   Date       Case_Number Daily_Case
##   <date>           <dbl>      <dbl>
## 1 2020-01-22         555          0
## 2 2020-01-23         654         99
## 3 2020-01-24         941        287
## 4 2020-01-25        1434        493
## 5 2020-01-26        2118        684
## 6 2020-01-27        2927        809

?recipe
?juice
?step_timeseries_signature
library(recipes)
recipe_spec <- recipe(Daily_Case ~ Date, training(splits)) %>%
step_timeseries_signature(Date) %>%
 step_rm(contains("am.pm"), contains("year"), contains("hour"), contains("minute"), contains("second"), contains("xts"), 'Date_index.num') %>% 
  step_fourier(Date, period = 365, K = 5)  %>%
  step_dummy(all_nominal())

df <-  recipe_spec %>% prep() %>% juice()

dim(df)

## [1] 247  44

ElasticNet

model_spec_glmnet <- linear_reg(penalty = 0.01, mixture = 0.5) %>% set_engine("glmnet")

workflow_fit_glmnet <- workflow() %>%
 add_model(model_spec_glmnet) %>%
 add_recipe(recipe_spec %>% step_rm(Date)) %>%
 fit(training(splits))

#workflow_fit_glmnet

RandomForest

model_spec_rf <- rand_forest(trees = 500, min_n = 50) %>%
 set_engine("randomForest")

workflow_fit_rf <- workflow() %>%
 add_model(model_spec_rf) %>%
 add_recipe(recipe_spec %>% step_rm(Date)) %>%
 fit(training(splits))

XGBoost

model_spec_prophet_boost <- prophet_boost() %>%
 set_engine("prophet_xgboost", yearly.seasonality = TRUE)

workflow_fit_prophet_boost <- workflow() %>%
 add_model(model_spec_prophet_boost) %>%
 add_recipe(recipe_spec) %>%
 fit(training(splits))

## Warning: The following arguments cannot be manually modified and were removed:
## yearly.seasonality.

## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.

## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.

Model Table

model_table <- modeltime_table(
 model_fit_arima,
 model_fit_prophet,
 workflow_fit_glmnet,
 workflow_fit_rf,
 workflow_fit_prophet_boost
)

model_table

## # Modeltime Table
## # A tibble: 5 x 3
##   .model_id .model     .model_desc                      
##       <int> <list>     <chr>                            
## 1         1 <fit[+]>   ARIMA(1,0,1)(0,1,1)[7] WITH DRIFT
## 2         2 <fit[+]>   PROPHET                          
## 3         3 <workflow> GLMNET                           
## 4         4 <workflow> RANDOMFOREST                     
## 5         5 <workflow> PROPHET W/ XGBOOST ERRORS

Calibration

calibration_table <- model_table %>%
 modeltime_calibrate(testing(splits))

calibration_table

## # Modeltime Table
## # A tibble: 5 x 5
##   .model_id .model     .model_desc                       .type .calibration_data
##       <int> <list>     <chr>                             <chr> <list>           
## 1         1 <fit[+]>   ARIMA(1,0,1)(0,1,1)[7] WITH DRIFT Test  <tibble [5 × 4]> 
## 2         2 <fit[+]>   PROPHET                           Test  <tibble [5 × 4]> 
## 3         3 <workflow> GLMNET                            Test  <tibble [5 × 4]> 
## 4         4 <workflow> RANDOMFOREST                      Test  <tibble [5 × 4]> 
## 5         5 <workflow> PROPHET W/ XGBOOST ERRORS         Test  <tibble [5 × 4]>

Forecasting

calibration_table %>%
 modeltime_forecast(actual_data = time_series_covid19_confirmed_global) %>%
 plot_modeltime_forecast(.interactive = TRUE)

## Using '.calibration_data' to forecast.

## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

## Evaluation

#head(time_series_covid19_confirmed_global)
calibration_table %>%
 modeltime_accuracy() %>%
 table_modeltime_accuracy(.interactive = FALSE)

Accuracy Table

.model_id	.model_desc	.type	mae	mape	mase	smape	rmse	rsq
1	ARIMA(1,0,1)(0,1,1)[7] WITH DRIFT	Test	30353.49	13.22	0.60	11.62	41686.11	0.58
2	PROPHET	Test	35280.16	14.93	0.69	13.29	44837.95	0.33
3	GLMNET	Test	50719.68	21.60	1.00	18.43	61426.80	0.05
4	RANDOMFOREST	Test	29234.16	11.66	0.57	11.09	37537.14	0.28
5	PROPHET W/ XGBOOST ERRORS	Test	62881.15	26.36	1.23	22.20	72017.32	0.59

## Forecast 3 Months

ret <- calibration_table %>%
 modeltime_refit(time_series_covid19_confirmed_global) %>%
 modeltime_forecast(h = "3 months", actual_data = time_series_covid19_confirmed_global)

## frequency = 7 observations per 1 week

## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.

## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.

## Warning: The following arguments cannot be manually modified and were removed:
## yearly.seasonality.

## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.

#ret %>% filter(.model_id == 2)

ret %>%
 plot_modeltime_forecast(.interactive = TRUE, .title = "Compare all models forecast Plot", .y_lab = "New daily cases")

ret %>% 
 filter(.model_id %in% c(NA,5) ) %>%
 plot_modeltime_forecast(.interactive = TRUE, .title = "Compare all models forecast Plot", .y_lab = "New daily cases")

tail(time_series_covid19_confirmed_global)

## # A tibble: 6 x 3
##   Date       Case_Number Daily_Case
##   <date>           <dbl>      <dbl>
## 1 2020-09-24    32227277     360934
## 2 2020-09-25    32562075     334798
## 3 2020-09-26    32840012     277937
## 4 2020-09-27    33077724     237712
## 5 2020-09-28    33353615     275891
## 6 2020-09-29    33561081     207466

# 33561081

ret %>% 
 filter(.model_id %in% c(5) ) %>%
  select(.index, .value) %>%
  mutate(total = cumsum(.value)) %>%
  mutate(predict_total = total +  33561081) %>%
  filter(predict_total >= 40000000)

## # A tibble: 68 x 4
##    .index      .value    total predict_total
##    <date>       <dbl>    <dbl>         <dbl>
##  1 2020-10-23 299040. 6643916.     40204997.
##  2 2020-10-24 290348. 6934263.     40495344.
##  3 2020-10-25 265987. 7200250.     40761331.
##  4 2020-10-26 287516. 7487766.     41048847.
##  5 2020-10-27 282705. 7770472.     41331553.
##  6 2020-10-28 299271. 8069742.     41630823.
##  7 2020-10-29 314961. 8384703.     41945784.
##  8 2020-10-30 318413. 8703116.     42264197.
##  9 2020-10-31 310782. 9013898.     42574979.
## 10 2020-11-01 280488. 9294386.     42855467.
## # … with 58 more rows

# model1 : 10/24
# model2 : 10/21
# model3 : 10/22
# model4 : 10/25
# model5 : 10/23


calibration_table %>%
 filter(.model_id == 1) %>%
 modeltime_refit(time_series_covid19_confirmed_global) %>%
 modeltime_forecast(h = "3 months", actual_data = time_series_covid19_confirmed_global) %>%
 plot_modeltime_forecast(.interactive = FALSE, .title = "ARIMA model forecast Plot", .y_lab = "New daily cases")

## frequency = 7 observations per 1 week

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## US

time_series_covid19_confirmed_US  <- covid19_tidy_df %>%
  filter(`Country/Region` == 'US') %>%
  filter(`Date` >= '2020-03-01') %>%
  group_by(Date) %>%
  summarize(Case_Number = sum(Case_Number))

## `summarise()` ungrouping output (override with `.groups` argument)

time_series_covid19_confirmed_US$Daily_Case <- c(0, diff(time_series_covid19_confirmed_US$Case_Number))


time_series_covid19_confirmed_US %>%
 plot_time_series(Date, Daily_Case, .interactive = TRUE)

Split Data

splits <- time_series_covid19_confirmed_US %>%
  time_series_split(assess = "30 days", cumulative = TRUE)

## Using date_var: Date

splits %>%
 tk_time_series_cv_plan() %>%
 plot_time_series_cv_plan(Date, Daily_Case, .interactive = TRUE)

## Preprocessing
recipe_spec <- recipe(Daily_Case ~ Date, training(splits)) %>%
step_timeseries_signature(Date) %>%
 step_rm(contains("am.pm"), contains("year"), contains("hour"), contains("minute"), contains("second"), contains("xts"), 'Date_index.num') %>% 
  step_fourier(Date, period = 365, K = 5)  %>%
  step_dummy(all_nominal())

df <-  recipe_spec %>% prep() %>% juice()
head(df)

## # A tibble: 6 x 44
##   Date       Daily_Case Date_half Date_quarter Date_month Date_day Date_wday
##   <date>          <dbl>     <int>        <int>      <int>    <int>     <int>
## 1 2020-03-01          0         1            1          3        1         1
## 2 2020-03-02         22         1            1          3        2         2
## 3 2020-03-03         20         1            1          3        3         3
## 4 2020-03-04         33         1            1          3        4         4
## 5 2020-03-05         77         1            1          3        5         5
## 6 2020-03-06         53         1            1          3        6         6
## # … with 37 more variables: Date_mday <int>, Date_qday <int>, Date_yday <int>,
## #   Date_mweek <int>, Date_week <int>, Date_week.iso <int>, Date_week2 <int>,
## #   Date_week3 <int>, Date_week4 <int>, Date_mday7 <int>, Date_sin365_K1 <dbl>,
## #   Date_cos365_K1 <dbl>, Date_sin365_K2 <dbl>, Date_cos365_K2 <dbl>,
## #   Date_sin365_K3 <dbl>, Date_cos365_K3 <dbl>, Date_sin365_K4 <dbl>,
## #   Date_cos365_K4 <dbl>, Date_sin365_K5 <dbl>, Date_cos365_K5 <dbl>,
## #   Date_month.lbl_01 <dbl>, Date_month.lbl_02 <dbl>, Date_month.lbl_03 <dbl>,
## #   Date_month.lbl_04 <dbl>, Date_month.lbl_05 <dbl>, Date_month.lbl_06 <dbl>,
## #   Date_month.lbl_07 <dbl>, Date_month.lbl_08 <dbl>, Date_month.lbl_09 <dbl>,
## #   Date_month.lbl_10 <dbl>, Date_month.lbl_11 <dbl>, Date_wday.lbl_1 <dbl>,
## #   Date_wday.lbl_2 <dbl>, Date_wday.lbl_3 <dbl>, Date_wday.lbl_4 <dbl>,
## #   Date_wday.lbl_5 <dbl>, Date_wday.lbl_6 <dbl>

Training Model

model_fit_arima <- arima_reg() %>%
 set_engine("auto_arima") %>%
 fit(Daily_Case ~ Date, training(splits))

## frequency = 7 observations per 1 week

model_fit_prophet <- prophet_reg() %>%
 set_engine("prophet", yearly.seasonality = TRUE) %>%
 fit(Daily_Case ~ Date, training(splits))

## Warning: The following arguments cannot be manually modified and were removed:
## yearly.seasonality.

## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.

## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.

model_spec_glmnet <- linear_reg(penalty = 0.01, mixture = 0.5) %>% set_engine("glmnet")

workflow_fit_glmnet <- workflow() %>%
 add_model(model_spec_glmnet) %>%
 add_recipe(recipe_spec %>% step_rm(Date)) %>%
 fit(training(splits))

model_spec_rf <- rand_forest(trees = 500, min_n = 50) %>%
 set_engine("randomForest")

workflow_fit_rf <- workflow() %>%
 add_model(model_spec_rf) %>%
 add_recipe(recipe_spec %>% step_rm(Date)) %>%
 fit(training(splits))


model_spec_prophet_boost <- prophet_boost() %>%
 set_engine("prophet_xgboost", yearly.seasonality = TRUE)

workflow_fit_prophet_boost <- workflow() %>%
 add_model(model_spec_prophet_boost) %>%
 add_recipe(recipe_spec) %>%
 fit(training(splits))

## Warning: The following arguments cannot be manually modified and were removed:
## yearly.seasonality.

## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.

model_table <- modeltime_table(
 model_fit_arima,
 model_fit_prophet,
 workflow_fit_glmnet,
 workflow_fit_rf,
 workflow_fit_prophet_boost
)

Evaluation

calibration_table <- model_table %>%
 modeltime_calibrate(testing(splits))

calibration_table %>%
 modeltime_accuracy() %>%
 table_modeltime_accuracy(.interactive = FALSE)

Accuracy Table

.model_id	.model_desc	.type	mae	mape	mase	smape	rmse	rsq
1	ARIMA(1,1,0)(0,1,1)[7]	Test	5567.94	14.46	0.83	14.71	7221.18	0.17
2	PROPHET	Test	23099.14	62.04	3.45	45.90	23763.13	0.39
3	GLMNET	Test	17968.62	46.96	2.69	35.41	21541.65	0.12
4	RANDOMFOREST	Test	8612.36	24.92	1.29	20.77	10292.23	0.10
5	PROPHET W/ XGBOOST ERRORS	Test	14204.31	37.89	2.12	29.70	17353.53	0.16

Forecasting

calibration_table %>%
 modeltime_forecast(actual_data = time_series_covid19_confirmed_US) %>%
 plot_modeltime_forecast(.interactive = TRUE)

## Using '.calibration_data' to forecast.

## 3 Months Prediction

ret <- calibration_table %>%
 modeltime_refit(time_series_covid19_confirmed_US) %>%
 modeltime_forecast(h = "3 months", actual_data = time_series_covid19_confirmed_US)

## frequency = 7 observations per 1 week

## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.

## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.

## Warning: The following arguments cannot be manually modified and were removed:
## yearly.seasonality.

## Disabling yearly seasonality. Run prophet with yearly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.

ret %>%
 plot_modeltime_forecast(.interactive = TRUE, .title = "Compare all models forecast Plot", .y_lab = "New daily cases")

Demo20200930

David Chiu

9/30/2020

下載確診資料

載入確診資料

彙整確診資料

建立訓練與測試資料集

Prophet

Machine Learning Model

ElasticNet

RandomForest

XGBoost

Model Table

Calibration

Forecasting

Split Data

Training Model

Evaluation

Forecasting