Data

getSymbols("^JKSE", src = "yahoo")
## Warning: ^JKSE contains missing values. Some functions will not work if objects
## contain missing values in the middle of the series. Consider using na.omit(),
## na.approx(), na.fill(), etc to remove or replace them.
## [1] "^JKSE"
JKSE <- JKSE[,6]

jkse_tbl <- tk_tbl(JKSE)

ihsg_tbl <- jkse_tbl %>% rename(value = JKSE.Adjusted, date = index)

ihsg_tbl <- ihsg_tbl %>% 
  mutate(value_log = log1p(value )) %>% 
  mutate(value_standard = standardize_vec(value_log)) %>% 
  mutate(value_ret = ROC(value)) %>% 
  na.omit()
## Standardization Parameters
## mean: 8.359412025005
## standard deviation: 0.421612798080202
mean_transform <- 8.359412025005
sd_transform <- 0.421612798080202

ihsg_tbl %>% glimpse()
## Rows: 3,910
## Columns: 5
## $ date           <date> 2007-01-03, 2007-01-04, 2007-01-05, 2007-01-08, 2007-0…
## $ value          <dbl> 1834.709, 1824.103, 1832.550, 1813.394, 1780.881, 1710.…
## $ value_log      <dbl> 7.515186, 7.509392, 7.514009, 7.503507, 7.485425, 7.445…
## $ value_standard <dbl> -2.002373, -2.016116, -2.005164, -2.030074, -2.072962, …
## $ value_ret      <dbl> -0.0009866103, -0.0057975029, 0.0046200921, -0.01050821…
ihsg_tbl %>% summary()
##       date                value        value_log     value_standard     
##  Min.   :2007-01-03   Min.   :1111   Min.   :7.014   Min.   :-3.190477  
##  1st Qu.:2011-01-27   1st Qu.:3597   1st Qu.:8.188   1st Qu.:-0.406108  
##  Median :2015-01-28   Median :4845   Median :8.486   Median : 0.300154  
##  Mean   :2015-02-11   Mean   :4602   Mean   :8.358   Mean   :-0.002725  
##  3rd Qu.:2019-03-19   3rd Qu.:5952   3rd Qu.:8.692   3rd Qu.: 0.788051  
##  Max.   :2023-03-24   Max.   :7318   Max.   :8.898   Max.   : 1.277995  
##    value_ret         
##  Min.   :-0.1130596  
##  1st Qu.:-0.0050624  
##  Median : 0.0009743  
##  Mean   : 0.0002979  
##  3rd Qu.: 0.0064060  
##  Max.   : 0.0970422

Transformation

Log

ihsg_tbl %>% 
  plot_time_series(.date_var = date, .value = log1p(value))
# Reverse back
ihsg_tbl %>% 
  plot_time_series(.date_var = date, .value = log1p(value) %>% expm1())
ihsg_tbl %>% 
  plot_time_series_regression(.date_var = date, 
                              .formula = log1p(value) ~ as.numeric(date) 
                              + wday(date, label = TRUE) 
                              + month(date, label = TRUE),.show_summary = TRUE)
## 
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.84325 -0.10577  0.03681  0.14417  0.32501 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   4.773e+00  3.018e-02 158.166   <2e-16 ***
## as.numeric(date)              2.176e-04  1.822e-06 119.426   <2e-16 ***
## wday(date, label = TRUE).L    1.864e-03  7.000e-03   0.266   0.7900    
## wday(date, label = TRUE).Q    3.904e-03  6.980e-03   0.559   0.5760    
## wday(date, label = TRUE).C   -3.448e-03  6.975e-03  -0.494   0.6211    
## wday(date, label = TRUE)^4    1.529e-03  6.949e-03   0.220   0.8258    
## month(date, label = TRUE).L   2.542e-03  1.076e-02   0.236   0.8132    
## month(date, label = TRUE).Q  -2.079e-02  1.086e-02  -1.914   0.0556 .  
## month(date, label = TRUE).C  -3.085e-03  1.082e-02  -0.285   0.7756    
## month(date, label = TRUE)^4   1.319e-02  1.074e-02   1.227   0.2198    
## month(date, label = TRUE)^5  -5.654e-03  1.083e-02  -0.522   0.6017    
## month(date, label = TRUE)^6   5.590e-03  1.080e-02   0.518   0.6046    
## month(date, label = TRUE)^7   7.201e-04  1.075e-02   0.067   0.9466    
## month(date, label = TRUE)^8  -7.240e-03  1.074e-02  -0.674   0.5004    
## month(date, label = TRUE)^9   1.657e-02  1.081e-02   1.533   0.1253    
## month(date, label = TRUE)^10  4.412e-03  1.095e-02   0.403   0.6869    
## month(date, label = TRUE)^11 -6.550e-03  1.093e-02  -0.599   0.5492    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.195 on 3893 degrees of freedom
## Multiple R-squared:  0.7862, Adjusted R-squared:  0.7853 
## F-statistic: 894.5 on 16 and 3893 DF,  p-value: < 2.2e-16

Box Cox

ihsg_tbl %>% 
  plot_time_series(.date_var = date, .value = box_cox_vec(value+1, lambda = "auto")) 
## box_cox_vec(): Using value for lambda: 0.728853335050395
# lambda 0.731279162719333

ihsg_tbl %>% 
  plot_time_series(.date_var = date, 
                   .value = box_cox_vec(value+1, lambda = "auto") %>% box_cox_inv_vec(lambda = 0.731279162719333))
## box_cox_vec(): Using value for lambda: 0.728853335050395
ihsg_tbl %>% 
  plot_time_series_regression(.date_var = date, 
                              .formula = box_cox_vec(value+1, lambda = "auto") ~ as.numeric(date) 
                              + wday(date, label = TRUE) 
                              + month(date, label = TRUE),.show_summary = TRUE)
## box_cox_vec(): Using value for lambda: 0.728853335050395
## 
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -226.10  -32.10   11.58   45.22  124.82 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -8.537e+02  9.603e+00 -88.895   <2e-16 ***
## as.numeric(date)              9.014e-02  5.798e-04 155.481   <2e-16 ***
## wday(date, label = TRUE).L    1.136e+00  2.227e+00   0.510   0.6101    
## wday(date, label = TRUE).Q    1.090e+00  2.221e+00   0.491   0.6237    
## wday(date, label = TRUE).C   -9.399e-01  2.220e+00  -0.423   0.6720    
## wday(date, label = TRUE)^4    4.819e-01  2.211e+00   0.218   0.8275    
## month(date, label = TRUE).L  -5.206e+00  3.424e+00  -1.521   0.1284    
## month(date, label = TRUE).Q   1.067e+00  3.456e+00   0.309   0.7575    
## month(date, label = TRUE).C   4.920e-01  3.444e+00   0.143   0.8864    
## month(date, label = TRUE)^4   9.282e-01  3.419e+00   0.272   0.7860    
## month(date, label = TRUE)^5  -3.749e-01  3.446e+00  -0.109   0.9134    
## month(date, label = TRUE)^6   1.276e+00  3.436e+00   0.371   0.7105    
## month(date, label = TRUE)^7  -1.670e+00  3.422e+00  -0.488   0.6255    
## month(date, label = TRUE)^8  -2.831e+00  3.418e+00  -0.828   0.4076    
## month(date, label = TRUE)^9   7.062e+00  3.440e+00   2.053   0.0402 *  
## month(date, label = TRUE)^10  2.079e+00  3.483e+00   0.597   0.5507    
## month(date, label = TRUE)^11 -1.780e+00  3.479e+00  -0.512   0.6090    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 62.06 on 3893 degrees of freedom
## Multiple R-squared:  0.8618, Adjusted R-squared:  0.8612 
## F-statistic:  1517 on 16 and 3893 DF,  p-value: < 2.2e-16
## box_cox_vec(): Using value for lambda: 0.728853335050395

Normalise

ihsg_tbl %>% 
  plot_time_series(.date_var = date, .value = normalize_vec(value))
## Normalization Parameters
## min: 1111.390015
## max: 7318.016113
ihsg_tbl %>% 
  plot_time_series(.date_var = date, .value = normalize_vec(value) %>% normalize_inv_vec(min = 1111.390015, max = 7318.016113))
## Normalization Parameters
## min: 1111.390015
## max: 7318.016113

Standardise

ihsg_tbl %>% 
  plot_time_series(.date_var = date, 
                   .value = standardize_vec(value))
## Standardization Parameters
## mean: 4601.96618569361
## standard deviation: 1577.95422264873
ihsg_tbl %>% 
  plot_time_series(.date_var = date, 
                   .value = standardize_vec(value) %>% standardize_inv_vec(mean = 4604.47526640228, sd = 1579.05165021775))
## Standardization Parameters
## mean: 4601.96618569361
## standard deviation: 1577.95422264873

Future Data

ihsg_full_tbl <- ihsg_tbl %>% 
  bind_rows(future_frame(.date_var = date, .data = ., .length_out = "1 year"))

ihsg_future_tbl <- ihsg_full_tbl %>% filter(is.na(value))

ihsg_full_tbl %>% summary()
##       date                value        value_log     value_standard   
##  Min.   :2007-01-03   Min.   :1111   Min.   :7.014   Min.   :-3.1905  
##  1st Qu.:2011-06-12   1st Qu.:3597   1st Qu.:8.188   1st Qu.:-0.4061  
##  Median :2015-10-28   Median :4845   Median :8.486   Median : 0.3002  
##  Mean   :2015-11-07   Mean   :4602   Mean   :8.358   Mean   :-0.0027  
##  3rd Qu.:2020-05-11   3rd Qu.:5952   3rd Qu.:8.692   3rd Qu.: 0.7881  
##  Max.   :2024-03-24   Max.   :7318   Max.   :8.898   Max.   : 1.2780  
##                       NA's   :366    NA's   :366     NA's   :366      
##    value_ret      
##  Min.   :-0.1131  
##  1st Qu.:-0.0051  
##  Median : 0.0010  
##  Mean   : 0.0003  
##  3rd Qu.: 0.0064  
##  Max.   : 0.0970  
##  NA's   :366
ihsg_future_tbl %>% summary()
##       date                value       value_log   value_standard   value_ret  
##  Min.   :2023-03-25   Min.   : NA   Min.   : NA   Min.   : NA    Min.   : NA  
##  1st Qu.:2023-06-24   1st Qu.: NA   1st Qu.: NA   1st Qu.: NA    1st Qu.: NA  
##  Median :2023-09-23   Median : NA   Median : NA   Median : NA    Median : NA  
##  Mean   :2023-09-23   Mean   :NaN   Mean   :NaN   Mean   :NaN    Mean   :NaN  
##  3rd Qu.:2023-12-23   3rd Qu.: NA   3rd Qu.: NA   3rd Qu.: NA    3rd Qu.: NA  
##  Max.   :2024-03-24   Max.   : NA   Max.   : NA   Max.   : NA    Max.   : NA  
##                       NA's   :366   NA's   :366   NA's   :366    NA's   :366

Plot

ihsg_tbl %>% 
  plot_time_series(.date_var = date, .value = value, .smooth = F)
ihsg_tbl %>% 
  plot_time_series(.date_var = date, .value = value_log, .smooth = F)
ihsg_tbl %>% 
  plot_time_series(.date_var = date, .value = value_standard, .smooth = F)
ihsg_tbl %>% 
  plot_time_series_regression(.date_var = date, 
                              .formula = value ~ as.numeric(date) 
                              + wday(date, label = TRUE) 
                              + month(date, label = TRUE),.show_summary = TRUE)
## 
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2264.4  -288.3   102.9   386.7  1153.5 
## 
## Coefficients:
##                                Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)                  -9.596e+03  8.553e+01 -112.192   <2e-16 ***
## as.numeric(date)              8.617e-01  5.164e-03  166.872   <2e-16 ***
## wday(date, label = TRUE).L    1.184e+01  1.984e+01    0.597   0.5507    
## wday(date, label = TRUE).Q    8.867e+00  1.978e+01    0.448   0.6540    
## wday(date, label = TRUE).C   -7.746e+00  1.977e+01   -0.392   0.6952    
## wday(date, label = TRUE)^4    4.069e+00  1.970e+01    0.207   0.8363    
## month(date, label = TRUE).L  -7.061e+01  3.049e+01   -2.316   0.0206 *  
## month(date, label = TRUE).Q   3.716e+01  3.078e+01    1.207   0.2275    
## month(date, label = TRUE).C   8.641e+00  3.067e+01    0.282   0.7782    
## month(date, label = TRUE)^4  -2.960e+00  3.045e+01   -0.097   0.9226    
## month(date, label = TRUE)^5   4.109e+00  3.069e+01    0.134   0.8935    
## month(date, label = TRUE)^6   9.415e+00  3.060e+01    0.308   0.7584    
## month(date, label = TRUE)^7  -2.240e+01  3.048e+01   -0.735   0.4625    
## month(date, label = TRUE)^8  -2.720e+01  3.044e+01   -0.894   0.3716    
## month(date, label = TRUE)^9   6.762e+01  3.064e+01    2.207   0.0274 *  
## month(date, label = TRUE)^10  1.990e+01  3.102e+01    0.642   0.5212    
## month(date, label = TRUE)^11 -1.298e+01  3.099e+01   -0.419   0.6754    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 552.8 on 3893 degrees of freedom
## Multiple R-squared:  0.8778, Adjusted R-squared:  0.8773 
## F-statistic:  1748 on 16 and 3893 DF,  p-value: < 2.2e-16

Splits

splits <- time_series_split(ihsg_tbl, assess = "3 year", cumulative = TRUE)
## Using date_var: date
# splits <- initial_time_split(ihsg_tbl, prop = 0.9)

train_data <- training(splits)
test_data <- testing(splits)

train_data %>% summary()
##       date                value        value_log     value_standard    
##  Min.   :2007-01-03   Min.   :1111   Min.   :7.014   Min.   :-3.19048  
##  1st Qu.:2010-05-06   1st Qu.:2834   1st Qu.:7.950   1st Qu.:-0.97139  
##  Median :2013-08-01   Median :4439   Median :8.398   Median : 0.09251  
##  Mean   :2013-08-16   Mean   :4227   Mean   :8.273   Mean   :-0.20387  
##  3rd Qu.:2016-11-21   3rd Qu.:5373   3rd Qu.:8.589   3rd Qu.: 0.54521  
##  Max.   :2020-04-23   Max.   :6681   Max.   :8.807   Max.   : 1.06188  
##    value_ret         
##  Min.   :-0.1130596  
##  1st Qu.:-0.0052096  
##  Median : 0.0010271  
##  Mean   : 0.0002434  
##  3rd Qu.: 0.0065355  
##  Max.   : 0.0970422
test_data %>% summary()
##       date                value        value_log     value_standard  
##  Min.   :2020-04-24   Min.   :4496   Min.   :8.411   Min.   :0.1228  
##  1st Qu.:2021-01-25   1st Qu.:5979   1st Qu.:8.696   1st Qu.:0.7988  
##  Median :2021-10-15   Median :6524   Median :8.783   Median :1.0057  
##  Mean   :2021-10-14   Mean   :6283   Mean   :8.739   Mean   :0.8992  
##  3rd Qu.:2022-07-13   3rd Qu.:6873   3rd Qu.:8.836   3rd Qu.:1.1293  
##  Max.   :2023-03-24   Max.   :7318   Max.   :8.898   Max.   :1.2780  
##    value_ret         
##  Min.   :-0.0513845  
##  1st Qu.:-0.0047548  
##  Median : 0.0006952  
##  Mean   : 0.0005424  
##  3rd Qu.: 0.0060591  
##  Max.   : 0.0347124
splits %>%
    tk_time_series_cv_plan() %>%
    plot_time_series_cv_plan(date, value)

Deeper Checking

Seasonality

ihsg_tbl %>% 
  plot_acf_diagnostics(.date_var = date,.value = value, .lags = 60)
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
ihsg_tbl %>%
  plot_acf_diagnostics(.date_var = date,.value = diff_vec(value), .lags = 60)
## diff_vec(): Initial values: 1834.708984
# Lag 26 and Lag 35


# ihsg_tbl %>% 
#   plot_acf_diagnostics(.date_var = date,.value = log(diff_vec(value)+1), .lags = 60)
# 
# ihsg_tbl %>% 
#   plot_acf_diagnostics(.date_var = date,.value = log(value+1), .lags = 60)

Model

Auto ARIMA

model_fit_arima <- arima_reg() %>%
    set_engine(engine = "auto_arima") %>%
    fit(value ~ date, data = train_data)
## frequency = 5 observations per 1 week
model_fit_arima
## parsnip model object
## 
## Series: outcome 
## ARIMA(0,1,1) 
## 
## Coefficients:
##          ma1
##       0.0653
## s.e.  0.0179
## 
## sigma^2 = 2312:  log likelihood = -16912.17
## AIC=33828.35   AICc=33828.35   BIC=33840.49

Auto ARIMA complete

model_fit_arima_complete <- arima_reg() %>%
    set_engine(engine = "auto_arima") %>%
    fit(value ~ date 
        + fourier_vec(date, period = 5)
        + fourier_vec(date, period = 20)
        + fourier_vec(date, period = 60)
        + fourier_vec(date, period = 200)
        + month(date, label = TRUE), data = train_data)
## frequency = 5 observations per 1 week
model_fit_arima_complete
## parsnip model object
## 
## Series: outcome 
## Regression with ARIMA(0,1,1) errors 
## 
## Coefficients:
##          ma1  fourier_vec_date_period_5  fourier_vec_date_period_20
##       0.0679                    -0.1968                      3.8482
## s.e.  0.0180                     0.8704                      2.2740
##       fourier_vec_date_period_60  fourier_vec_date_period_200
##                           5.7809                      25.8773
## s.e.                      6.3815                      21.4447
##       month_date_label_true_Feb  month_date_label_true_Mar
##                         -2.6177                   -12.8883
## s.e.                    12.2569                    16.5906
##       month_date_label_true_Apr  month_date_label_true_May
##                         -9.0040                   -21.8731
## s.e.                    19.3414                    21.2512
##       month_date_label_true_Jun  month_date_label_true_Jul
##                         -3.3499                    -7.3274
## s.e.                    22.3413                    22.7719
##       month_date_label_true_Aug  month_date_label_true_Sep
##                          0.3404                   -19.6084
## s.e.                    22.5111                    21.5560
##       month_date_label_true_Oct  month_date_label_true_Nov
##                        -39.0250                   -44.5712
## s.e.                    19.8292                    17.0815
##       month_date_label_true_Dec
##                          2.5900
## s.e.                    12.6931
## 
## sigma^2 = 2304:  log likelihood = -16899.03
## AIC=33832.05   AICc=33832.24   BIC=33935.23

Auto ARIMA ACF

model_fit_arima_complete_acf <- arima_reg() %>%
    set_engine(engine = "auto_arima") %>%
    fit(value ~ date 
        + fourier_vec(date, period = 26)
        + fourier_vec(date, period = 35)
        + fourier_vec(date, period = 54)
        + month(date, label = TRUE), data = train_data)
## frequency = 5 observations per 1 week
model_fit_arima_complete_acf
## parsnip model object
## 
## Series: outcome 
## Regression with ARIMA(0,1,1) errors 
## 
## Coefficients:
##          ma1  fourier_vec_date_period_26  fourier_vec_date_period_35
##       0.0678                      1.2942                     -3.9924
## s.e.  0.0180                      2.9349                      3.8213
##       fourier_vec_date_period_54  month_date_label_true_Feb
##                          -2.7100                    -2.7794
## s.e.                      5.8796                    12.2619
##       month_date_label_true_Mar  month_date_label_true_Apr
##                        -13.7400                    -8.7465
## s.e.                    16.5832                    19.3660
##       month_date_label_true_May  month_date_label_true_Jun
##                          -20.96                    -2.6745
## s.e.                      21.29                    22.3778
##       month_date_label_true_Jul  month_date_label_true_Aug
##                         -7.7051                     0.1917
## s.e.                    22.7601                    22.5058
##       month_date_label_true_Sep  month_date_label_true_Oct
##                        -18.8362                   -37.5343
## s.e.                    21.5705                    19.8634
##       month_date_label_true_Nov  month_date_label_true_Dec
##                        -42.9790                     3.9484
## s.e.                    17.1492                    12.7417
## 
## sigma^2 = 2306:  log likelihood = -16900.83
## AIC=33833.65   AICc=33833.82   BIC=33930.77

ARIMA Model Table

model_arima_tbl <- modeltime_table(model_fit_arima, 
                                   model_fit_arima_complete, 
                                   model_fit_arima_complete_acf) %>% 
  update_model_description(1, "Auto ARIMA") %>% 
  update_model_description(2, "ARIMA with logic courier") %>% 
  update_model_description(3, "ARIMA with ACF Season")

model_arima_tbl
## # Modeltime Table
## # A tibble: 3 × 3
##   .model_id .model   .model_desc             
##       <int> <list>   <chr>                   
## 1         1 <fit[+]> Auto ARIMA              
## 2         2 <fit[+]> ARIMA with logic courier
## 3         3 <fit[+]> ARIMA with ACF Season

Prophet

model_fit_prophet <- prophet_reg() %>%
    set_engine("prophet") %>%
    fit(value ~ date, data = train_data)
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
model_fit_prophet
## parsnip model object
## 
## PROPHET Model
## - growth: 'linear'
## - n.changepoints: 25
## - changepoint.range: 0.8
## - yearly.seasonality: 'auto'
## - weekly.seasonality: 'auto'
## - daily.seasonality: 'auto'
## - seasonality.mode: 'additive'
## - changepoint.prior.scale: 0.05
## - seasonality.prior.scale: 10
## - holidays.prior.scale: 10
## - logistic_cap: NULL
## - logistic_floor: NULL
## - extra_regressors: 0

Prophet Weekly

model_fit_prophet_wk <- prophet_reg(seasonality_yearly = F, 
                                 seasonality_weekly = T, 
                                 seasonality_daily = F) %>%
    set_engine("prophet") %>%
    fit(value ~ date, data = train_data)

model_fit_prophet_wk
## parsnip model object
## 
## PROPHET Model
## - growth: 'linear'
## - n.changepoints: 25
## - changepoint.range: 0.8
## - yearly.seasonality: 'FALSE'
## - weekly.seasonality: 'TRUE'
## - daily.seasonality: 'FALSE'
## - seasonality.mode: 'additive'
## - changepoint.prior.scale: 0.05
## - seasonality.prior.scale: 10
## - holidays.prior.scale: 10
## - logistic_cap: NULL
## - logistic_floor: NULL
## - extra_regressors: 0

Prophet Yearly and Weekly

model_fit_prophet_wk_yr <- prophet_reg(seasonality_yearly = T, 
                                 seasonality_weekly = T, 
                                 seasonality_daily = F) %>%
    set_engine("prophet") %>%
    fit(value ~ date, data = train_data)

model_fit_prophet_wk_yr
## parsnip model object
## 
## PROPHET Model
## - growth: 'linear'
## - n.changepoints: 25
## - changepoint.range: 0.8
## - yearly.seasonality: 'TRUE'
## - weekly.seasonality: 'TRUE'
## - daily.seasonality: 'FALSE'
## - seasonality.mode: 'additive'
## - changepoint.prior.scale: 0.05
## - seasonality.prior.scale: 10
## - holidays.prior.scale: 10
## - logistic_cap: NULL
## - logistic_floor: NULL
## - extra_regressors: 0

Prophet complete

model_fit_prophet_complete <- prophet_reg(seasonality_yearly = TRUE, 
                                 seasonality_weekly = TRUE, 
                                 seasonality_daily = TRUE) %>% 
  set_engine("prophet") %>%
  fit(value ~ date, data = train_data)

model_fit_prophet_complete
## parsnip model object
## 
## PROPHET Model
## - growth: 'linear'
## - n.changepoints: 25
## - changepoint.range: 0.8
## - yearly.seasonality: 'TRUE'
## - weekly.seasonality: 'TRUE'
## - daily.seasonality: 'TRUE'
## - seasonality.mode: 'additive'
## - changepoint.prior.scale: 0.05
## - seasonality.prior.scale: 10
## - holidays.prior.scale: 10
## - logistic_cap: NULL
## - logistic_floor: NULL
## - extra_regressors: 0

Prophet complete w/ fourier

model_fit_prophet_complete_fourier <- prophet_reg(seasonality_yearly = TRUE, 
                                 seasonality_weekly = TRUE, 
                                 seasonality_daily = TRUE) %>% 
  set_engine("prophet") %>%
  fit(value ~ date 
      + fourier_vec(value, period = 5) 
      + fourier_vec(value, period = 20) 
      + fourier_vec(value, period = 60) 
      + fourier_vec(value, period = 200), data = train_data)

model_fit_prophet_complete_fourier
## parsnip model object
## 
## PROPHET w/ Regressors Model
## - growth: 'linear'
## - n.changepoints: 25
## - changepoint.range: 0.8
## - yearly.seasonality: 'TRUE'
## - weekly.seasonality: 'TRUE'
## - daily.seasonality: 'TRUE'
## - seasonality.mode: 'additive'
## - changepoint.prior.scale: 0.05
## - seasonality.prior.scale: 10
## - holidays.prior.scale: 10
## - logistic_cap: NULL
## - logistic_floor: NULL
## - extra_regressors: 4

Prophet complete w/ fourier ACF

model_fit_prophet_complete_fourier_acf <- prophet_reg(seasonality_yearly = TRUE, 
                                 seasonality_weekly = TRUE, 
                                 seasonality_daily = TRUE) %>% 
  set_engine("prophet") %>%
  fit(value ~ date 
      + fourier_vec(value, period = 26) 
      + fourier_vec(value, period = 35) 
      + fourier_vec(value, period = 54), data = train_data)

model_fit_prophet_complete_fourier_acf
## parsnip model object
## 
## PROPHET w/ Regressors Model
## - growth: 'linear'
## - n.changepoints: 25
## - changepoint.range: 0.8
## - yearly.seasonality: 'TRUE'
## - weekly.seasonality: 'TRUE'
## - daily.seasonality: 'TRUE'
## - seasonality.mode: 'additive'
## - changepoint.prior.scale: 0.05
## - seasonality.prior.scale: 10
## - holidays.prior.scale: 10
## - logistic_cap: NULL
## - logistic_floor: NULL
## - extra_regressors: 3

Prophet Model Table

model_prophet_tbl <- modeltime_table(model_fit_prophet,
                                     model_fit_prophet_wk,
                                     model_fit_prophet_wk_yr,
                                     model_fit_prophet_complete,
                                     model_fit_prophet_complete_fourier,
                                     model_fit_prophet_complete_fourier_acf) %>% 
                  update_model_description(1, "Prophet Basic") %>% 
                  update_model_description(2, "Prophet Weekly Seasonality") %>% 
                  update_model_description(3, "Prophet Weekly and Yearly Seasonality") %>% 
                  update_model_description(4, "Prophet All Seasonality") %>% 
                  update_model_description(5, "Prophet All Season and Logic Fourier") %>% 
                  update_model_description(6, "Prophet All Season and ACF Fourier")

model_prophet_tbl
## # Modeltime Table
## # A tibble: 6 × 3
##   .model_id .model   .model_desc                          
##       <int> <list>   <chr>                                
## 1         1 <fit[+]> Prophet Basic                        
## 2         2 <fit[+]> Prophet Weekly Seasonality           
## 3         3 <fit[+]> Prophet Weekly and Yearly Seasonality
## 4         4 <fit[+]> Prophet All Seasonality              
## 5         5 <fit[+]> Prophet All Season and Logic Fourier 
## 6         6 <fit[+]> Prophet All Season and ACF Fourier

TBATS

model_fit_tbats <- seasonal_reg(seasonal_period_1 = 5,
             seasonal_period_2 = 20,
             seasonal_period_3 = 60) %>%
  set_engine("tbats") %>%
  fit(value ~ date, data = train_data)

model_fit_tbats
## parsnip model object
## 
## BATS(0.729, {0,0}, -, -)
## 
## Call: forecast::tbats(y = outcome, use.parallel = use.parallel)
## 
## Parameters
##   Lambda: 0.728702
##   Alpha: 1.066522
## 
## Seed States:
##         [,1]
## [1,] 319.083
## attr(,"lambda")
## [1] 0.7287018
## 
## Sigma: 5.040043
## AIC: 50498.76

TBATS ACF

model_fit_tbats_acf <- seasonal_reg(seasonal_period_1 = 26,
             seasonal_period_2 = 35,
             seasonal_period_3 = 54) %>%
  set_engine("tbats") %>%
  fit(value ~ date, data = train_data)

model_fit_tbats_acf
## parsnip model object
## 
## BATS(0.729, {0,0}, -, -)
## 
## Call: forecast::tbats(y = outcome, use.parallel = use.parallel)
## 
## Parameters
##   Lambda: 0.728702
##   Alpha: 1.066522
## 
## Seed States:
##         [,1]
## [1,] 319.083
## attr(,"lambda")
## [1] 0.7287018
## 
## Sigma: 5.040043
## AIC: 50498.76

TBATS Model Table

model_tbats_tbl <- modeltime_table(model_fit_tbats, 
                                   model_fit_tbats_acf) %>% 
                  update_model_description(1, "TBATS Logic Courier") %>% 
                  update_model_description(2, "TBATS ACF Courier")

model_tbats_tbl
## # Modeltime Table
## # A tibble: 2 × 3
##   .model_id .model   .model_desc        
##       <int> <list>   <chr>              
## 1         1 <fit[+]> TBATS Logic Courier
## 2         2 <fit[+]> TBATS ACF Courier

Model Table

# ?modeltime_table()
# ?combine_modeltime_tables()

model_ihsg_tbl <- combine_modeltime_tables(model_arima_tbl, 
                         model_prophet_tbl, 
                         model_tbats_tbl)


model_ihsg_tbl
## # Modeltime Table
## # A tibble: 11 × 3
##    .model_id .model   .model_desc                          
##        <int> <list>   <chr>                                
##  1         1 <fit[+]> Auto ARIMA                           
##  2         2 <fit[+]> ARIMA with logic courier             
##  3         3 <fit[+]> ARIMA with ACF Season                
##  4         4 <fit[+]> Prophet Basic                        
##  5         5 <fit[+]> Prophet Weekly Seasonality           
##  6         6 <fit[+]> Prophet Weekly and Yearly Seasonality
##  7         7 <fit[+]> Prophet All Seasonality              
##  8         8 <fit[+]> Prophet All Season and Logic Fourier 
##  9         9 <fit[+]> Prophet All Season and ACF Fourier   
## 10        10 <fit[+]> TBATS Logic Courier                  
## 11        11 <fit[+]> TBATS ACF Courier

Calibration

calibration_ihsg_tbl <- model_ihsg_tbl %>%
  modeltime_calibrate(new_data = test_data)

calibration_ihsg_tbl
## # Modeltime Table
## # A tibble: 11 × 5
##    .model_id .model   .model_desc                           .type .calibration…¹
##        <int> <list>   <chr>                                 <chr> <list>        
##  1         1 <fit[+]> Auto ARIMA                            Test  <tibble>      
##  2         2 <fit[+]> ARIMA with logic courier              Test  <tibble>      
##  3         3 <fit[+]> ARIMA with ACF Season                 Test  <tibble>      
##  4         4 <fit[+]> Prophet Basic                         Test  <tibble>      
##  5         5 <fit[+]> Prophet Weekly Seasonality            Test  <tibble>      
##  6         6 <fit[+]> Prophet Weekly and Yearly Seasonality Test  <tibble>      
##  7         7 <fit[+]> Prophet All Seasonality               Test  <tibble>      
##  8         8 <fit[+]> Prophet All Season and Logic Fourier  Test  <tibble>      
##  9         9 <fit[+]> Prophet All Season and ACF Fourier    Test  <tibble>      
## 10        10 <fit[+]> TBATS Logic Courier                   Test  <tibble>      
## 11        11 <fit[+]> TBATS ACF Courier                     Test  <tibble>      
## # … with abbreviated variable name ¹​.calibration_data

Accuracy

calibration_ihsg_tbl %>%
  modeltime_accuracy() %>% glimpse()
## Warning: A correlation computation is required, but `estimate` is constant
## and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be
## returned.
## Warning: A correlation computation is required, but `estimate` is constant and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be returned.
## A correlation computation is required, but `estimate` is constant and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be returned.
## Rows: 11
## Columns: 9
## $ .model_id   <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
## $ .model_desc <chr> "Auto ARIMA", "ARIMA with logic courier", "ARIMA with ACF …
## $ .type       <chr> "Test", "Test", "Test", "Test", "Test", "Test", "Test", "T…
## $ mae         <dbl> 1690.3058, 1677.7054, 1690.1833, 721.2147, 721.0091, 721.2…
## $ mape        <dbl> 25.79479, 25.59434, 25.79463, 11.57737, 11.56923, 11.57737…
## $ mase        <dbl> 39.87479, 39.57754, 39.87190, 17.01366, 17.00881, 17.01366…
## $ smape       <dbl> 30.27130, 30.00656, 30.26931, 11.78186, 11.77799, 11.78186…
## $ rmse        <dbl> 1840.1592, 1828.2789, 1839.8238, 833.3923, 829.8259, 833.3…
## $ rsq         <dbl> NA, 0.002485351, 0.003682248, 0.278112443, 0.783344417, 0.…

Forecast

calibration_ihsg_tbl %>%
  modeltime_forecast(new_data = test_data, actual_data = ihsg_tbl) %>% glimpse()
## Rows: 11,753
## Columns: 7
## $ .model_id   <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ .model_desc <chr> "ACTUAL", "ACTUAL", "ACTUAL", "ACTUAL", "ACTUAL", "ACTUAL"…
## $ .key        <fct> actual, actual, actual, actual, actual, actual, actual, ac…
## $ .index      <date> 2007-01-03, 2007-01-04, 2007-01-05, 2007-01-08, 2007-01-0…
## $ .value      <dbl> 1834.709, 1824.103, 1832.550, 1813.394, 1780.881, 1710.367…
## $ .conf_lo    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ .conf_hi    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…

Plot Forecast

calibration_ihsg_tbl %>%
  modeltime_forecast(new_data = test_data, actual_data = ihsg_tbl) %>%
  plot_modeltime_forecast(.conf_interval_show = FALSE)

Refit

refit_ihsg_tbl <- calibration_ihsg_tbl %>% 
  modeltime_refit(data = ihsg_tbl)
## frequency = 5 observations per 1 week
## frequency = 5 observations per 1 week
## frequency = 5 observations per 1 week
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.

Plot Refit

refit_ihsg_tbl %>%
    modeltime_forecast(new_data = ihsg_future_tbl,
                       actual_data = ihsg_full_tbl) %>%
    plot_modeltime_forecast(.conf_interval_show = F)
## Error: Problem occurred during prediction. Error in setup_dataframe(object, df): Found NaN in column fourier_vec_value_period_5
## Error: Problem occurred during prediction. Error in setup_dataframe(object, df): Found NaN in column fourier_vec_value_period_26

Library Prophet

library(prophet)
## Loading required package: Rcpp
## 
## Attaching package: 'Rcpp'
## The following object is masked from 'package:rsample':
## 
##     populate
## Loading required package: rlang
## 
## Attaching package: 'rlang'
## The following objects are masked from 'package:purrr':
## 
##     %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
##     flatten_lgl, flatten_raw, invoke, splice
ihsg_df <- ihsg_tbl %>% 
  select(date, value) %>% 
  rename(ds = date, y = value) %>% 
  as.data.frame()

ihsg_prophet <- prophet(df = ihsg_df)
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
future_df <- make_future_dataframe(ihsg_prophet, periods = 100)

forecast_df <- predict(ihsg_prophet, future_df)

plot(ihsg_prophet, forecast_df)