Data
getSymbols("^JKSE", src = "yahoo")
## Warning: ^JKSE contains missing values. Some functions will not work if objects
## contain missing values in the middle of the series. Consider using na.omit(),
## na.approx(), na.fill(), etc to remove or replace them.
## [1] "^JKSE"
JKSE <- JKSE[,6]
jkse_tbl <- tk_tbl(JKSE)
ihsg_tbl <- jkse_tbl %>% rename(value = JKSE.Adjusted, date = index)
ihsg_tbl <- ihsg_tbl %>%
mutate(value_log = log1p(value )) %>%
mutate(value_standard = standardize_vec(value_log)) %>%
mutate(value_ret = ROC(value)) %>%
na.omit()
## Standardization Parameters
## mean: 8.359412025005
## standard deviation: 0.421612798080202
mean_transform <- 8.359412025005
sd_transform <- 0.421612798080202
ihsg_tbl %>% glimpse()
## Rows: 3,910
## Columns: 5
## $ date <date> 2007-01-03, 2007-01-04, 2007-01-05, 2007-01-08, 2007-0…
## $ value <dbl> 1834.709, 1824.103, 1832.550, 1813.394, 1780.881, 1710.…
## $ value_log <dbl> 7.515186, 7.509392, 7.514009, 7.503507, 7.485425, 7.445…
## $ value_standard <dbl> -2.002373, -2.016116, -2.005164, -2.030074, -2.072962, …
## $ value_ret <dbl> -0.0009866103, -0.0057975029, 0.0046200921, -0.01050821…
ihsg_tbl %>% summary()
## date value value_log value_standard
## Min. :2007-01-03 Min. :1111 Min. :7.014 Min. :-3.190477
## 1st Qu.:2011-01-27 1st Qu.:3597 1st Qu.:8.188 1st Qu.:-0.406108
## Median :2015-01-28 Median :4845 Median :8.486 Median : 0.300154
## Mean :2015-02-11 Mean :4602 Mean :8.358 Mean :-0.002725
## 3rd Qu.:2019-03-19 3rd Qu.:5952 3rd Qu.:8.692 3rd Qu.: 0.788051
## Max. :2023-03-24 Max. :7318 Max. :8.898 Max. : 1.277995
## value_ret
## Min. :-0.1130596
## 1st Qu.:-0.0050624
## Median : 0.0009743
## Mean : 0.0002979
## 3rd Qu.: 0.0064060
## Max. : 0.0970422
Transformation
Log
ihsg_tbl %>%
plot_time_series(.date_var = date, .value = log1p(value))
# Reverse back
ihsg_tbl %>%
plot_time_series(.date_var = date, .value = log1p(value) %>% expm1())
ihsg_tbl %>%
plot_time_series_regression(.date_var = date,
.formula = log1p(value) ~ as.numeric(date)
+ wday(date, label = TRUE)
+ month(date, label = TRUE),.show_summary = TRUE)
##
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.84325 -0.10577 0.03681 0.14417 0.32501
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.773e+00 3.018e-02 158.166 <2e-16 ***
## as.numeric(date) 2.176e-04 1.822e-06 119.426 <2e-16 ***
## wday(date, label = TRUE).L 1.864e-03 7.000e-03 0.266 0.7900
## wday(date, label = TRUE).Q 3.904e-03 6.980e-03 0.559 0.5760
## wday(date, label = TRUE).C -3.448e-03 6.975e-03 -0.494 0.6211
## wday(date, label = TRUE)^4 1.529e-03 6.949e-03 0.220 0.8258
## month(date, label = TRUE).L 2.542e-03 1.076e-02 0.236 0.8132
## month(date, label = TRUE).Q -2.079e-02 1.086e-02 -1.914 0.0556 .
## month(date, label = TRUE).C -3.085e-03 1.082e-02 -0.285 0.7756
## month(date, label = TRUE)^4 1.319e-02 1.074e-02 1.227 0.2198
## month(date, label = TRUE)^5 -5.654e-03 1.083e-02 -0.522 0.6017
## month(date, label = TRUE)^6 5.590e-03 1.080e-02 0.518 0.6046
## month(date, label = TRUE)^7 7.201e-04 1.075e-02 0.067 0.9466
## month(date, label = TRUE)^8 -7.240e-03 1.074e-02 -0.674 0.5004
## month(date, label = TRUE)^9 1.657e-02 1.081e-02 1.533 0.1253
## month(date, label = TRUE)^10 4.412e-03 1.095e-02 0.403 0.6869
## month(date, label = TRUE)^11 -6.550e-03 1.093e-02 -0.599 0.5492
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.195 on 3893 degrees of freedom
## Multiple R-squared: 0.7862, Adjusted R-squared: 0.7853
## F-statistic: 894.5 on 16 and 3893 DF, p-value: < 2.2e-16
Box Cox
ihsg_tbl %>%
plot_time_series(.date_var = date, .value = box_cox_vec(value+1, lambda = "auto"))
## box_cox_vec(): Using value for lambda: 0.728853335050395
# lambda 0.731279162719333
ihsg_tbl %>%
plot_time_series(.date_var = date,
.value = box_cox_vec(value+1, lambda = "auto") %>% box_cox_inv_vec(lambda = 0.731279162719333))
## box_cox_vec(): Using value for lambda: 0.728853335050395
ihsg_tbl %>%
plot_time_series_regression(.date_var = date,
.formula = box_cox_vec(value+1, lambda = "auto") ~ as.numeric(date)
+ wday(date, label = TRUE)
+ month(date, label = TRUE),.show_summary = TRUE)
## box_cox_vec(): Using value for lambda: 0.728853335050395
##
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -226.10 -32.10 11.58 45.22 124.82
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.537e+02 9.603e+00 -88.895 <2e-16 ***
## as.numeric(date) 9.014e-02 5.798e-04 155.481 <2e-16 ***
## wday(date, label = TRUE).L 1.136e+00 2.227e+00 0.510 0.6101
## wday(date, label = TRUE).Q 1.090e+00 2.221e+00 0.491 0.6237
## wday(date, label = TRUE).C -9.399e-01 2.220e+00 -0.423 0.6720
## wday(date, label = TRUE)^4 4.819e-01 2.211e+00 0.218 0.8275
## month(date, label = TRUE).L -5.206e+00 3.424e+00 -1.521 0.1284
## month(date, label = TRUE).Q 1.067e+00 3.456e+00 0.309 0.7575
## month(date, label = TRUE).C 4.920e-01 3.444e+00 0.143 0.8864
## month(date, label = TRUE)^4 9.282e-01 3.419e+00 0.272 0.7860
## month(date, label = TRUE)^5 -3.749e-01 3.446e+00 -0.109 0.9134
## month(date, label = TRUE)^6 1.276e+00 3.436e+00 0.371 0.7105
## month(date, label = TRUE)^7 -1.670e+00 3.422e+00 -0.488 0.6255
## month(date, label = TRUE)^8 -2.831e+00 3.418e+00 -0.828 0.4076
## month(date, label = TRUE)^9 7.062e+00 3.440e+00 2.053 0.0402 *
## month(date, label = TRUE)^10 2.079e+00 3.483e+00 0.597 0.5507
## month(date, label = TRUE)^11 -1.780e+00 3.479e+00 -0.512 0.6090
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 62.06 on 3893 degrees of freedom
## Multiple R-squared: 0.8618, Adjusted R-squared: 0.8612
## F-statistic: 1517 on 16 and 3893 DF, p-value: < 2.2e-16
## box_cox_vec(): Using value for lambda: 0.728853335050395
Normalise
ihsg_tbl %>%
plot_time_series(.date_var = date, .value = normalize_vec(value))
## Normalization Parameters
## min: 1111.390015
## max: 7318.016113
ihsg_tbl %>%
plot_time_series(.date_var = date, .value = normalize_vec(value) %>% normalize_inv_vec(min = 1111.390015, max = 7318.016113))
## Normalization Parameters
## min: 1111.390015
## max: 7318.016113
Standardise
ihsg_tbl %>%
plot_time_series(.date_var = date,
.value = standardize_vec(value))
## Standardization Parameters
## mean: 4601.96618569361
## standard deviation: 1577.95422264873
ihsg_tbl %>%
plot_time_series(.date_var = date,
.value = standardize_vec(value) %>% standardize_inv_vec(mean = 4604.47526640228, sd = 1579.05165021775))
## Standardization Parameters
## mean: 4601.96618569361
## standard deviation: 1577.95422264873
Future Data
ihsg_full_tbl <- ihsg_tbl %>%
bind_rows(future_frame(.date_var = date, .data = ., .length_out = "1 year"))
ihsg_future_tbl <- ihsg_full_tbl %>% filter(is.na(value))
ihsg_full_tbl %>% summary()
## date value value_log value_standard
## Min. :2007-01-03 Min. :1111 Min. :7.014 Min. :-3.1905
## 1st Qu.:2011-06-12 1st Qu.:3597 1st Qu.:8.188 1st Qu.:-0.4061
## Median :2015-10-28 Median :4845 Median :8.486 Median : 0.3002
## Mean :2015-11-07 Mean :4602 Mean :8.358 Mean :-0.0027
## 3rd Qu.:2020-05-11 3rd Qu.:5952 3rd Qu.:8.692 3rd Qu.: 0.7881
## Max. :2024-03-24 Max. :7318 Max. :8.898 Max. : 1.2780
## NA's :366 NA's :366 NA's :366
## value_ret
## Min. :-0.1131
## 1st Qu.:-0.0051
## Median : 0.0010
## Mean : 0.0003
## 3rd Qu.: 0.0064
## Max. : 0.0970
## NA's :366
ihsg_future_tbl %>% summary()
## date value value_log value_standard value_ret
## Min. :2023-03-25 Min. : NA Min. : NA Min. : NA Min. : NA
## 1st Qu.:2023-06-24 1st Qu.: NA 1st Qu.: NA 1st Qu.: NA 1st Qu.: NA
## Median :2023-09-23 Median : NA Median : NA Median : NA Median : NA
## Mean :2023-09-23 Mean :NaN Mean :NaN Mean :NaN Mean :NaN
## 3rd Qu.:2023-12-23 3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA
## Max. :2024-03-24 Max. : NA Max. : NA Max. : NA Max. : NA
## NA's :366 NA's :366 NA's :366 NA's :366
Plot
ihsg_tbl %>%
plot_time_series(.date_var = date, .value = value, .smooth = F)
ihsg_tbl %>%
plot_time_series(.date_var = date, .value = value_log, .smooth = F)
ihsg_tbl %>%
plot_time_series(.date_var = date, .value = value_standard, .smooth = F)
ihsg_tbl %>%
plot_time_series_regression(.date_var = date,
.formula = value ~ as.numeric(date)
+ wday(date, label = TRUE)
+ month(date, label = TRUE),.show_summary = TRUE)
##
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2264.4 -288.3 102.9 386.7 1153.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.596e+03 8.553e+01 -112.192 <2e-16 ***
## as.numeric(date) 8.617e-01 5.164e-03 166.872 <2e-16 ***
## wday(date, label = TRUE).L 1.184e+01 1.984e+01 0.597 0.5507
## wday(date, label = TRUE).Q 8.867e+00 1.978e+01 0.448 0.6540
## wday(date, label = TRUE).C -7.746e+00 1.977e+01 -0.392 0.6952
## wday(date, label = TRUE)^4 4.069e+00 1.970e+01 0.207 0.8363
## month(date, label = TRUE).L -7.061e+01 3.049e+01 -2.316 0.0206 *
## month(date, label = TRUE).Q 3.716e+01 3.078e+01 1.207 0.2275
## month(date, label = TRUE).C 8.641e+00 3.067e+01 0.282 0.7782
## month(date, label = TRUE)^4 -2.960e+00 3.045e+01 -0.097 0.9226
## month(date, label = TRUE)^5 4.109e+00 3.069e+01 0.134 0.8935
## month(date, label = TRUE)^6 9.415e+00 3.060e+01 0.308 0.7584
## month(date, label = TRUE)^7 -2.240e+01 3.048e+01 -0.735 0.4625
## month(date, label = TRUE)^8 -2.720e+01 3.044e+01 -0.894 0.3716
## month(date, label = TRUE)^9 6.762e+01 3.064e+01 2.207 0.0274 *
## month(date, label = TRUE)^10 1.990e+01 3.102e+01 0.642 0.5212
## month(date, label = TRUE)^11 -1.298e+01 3.099e+01 -0.419 0.6754
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 552.8 on 3893 degrees of freedom
## Multiple R-squared: 0.8778, Adjusted R-squared: 0.8773
## F-statistic: 1748 on 16 and 3893 DF, p-value: < 2.2e-16
Splits
splits <- time_series_split(ihsg_tbl, assess = "3 year", cumulative = TRUE)
## Using date_var: date
# splits <- initial_time_split(ihsg_tbl, prop = 0.9)
train_data <- training(splits)
test_data <- testing(splits)
train_data %>% summary()
## date value value_log value_standard
## Min. :2007-01-03 Min. :1111 Min. :7.014 Min. :-3.19048
## 1st Qu.:2010-05-06 1st Qu.:2834 1st Qu.:7.950 1st Qu.:-0.97139
## Median :2013-08-01 Median :4439 Median :8.398 Median : 0.09251
## Mean :2013-08-16 Mean :4227 Mean :8.273 Mean :-0.20387
## 3rd Qu.:2016-11-21 3rd Qu.:5373 3rd Qu.:8.589 3rd Qu.: 0.54521
## Max. :2020-04-23 Max. :6681 Max. :8.807 Max. : 1.06188
## value_ret
## Min. :-0.1130596
## 1st Qu.:-0.0052096
## Median : 0.0010271
## Mean : 0.0002434
## 3rd Qu.: 0.0065355
## Max. : 0.0970422
test_data %>% summary()
## date value value_log value_standard
## Min. :2020-04-24 Min. :4496 Min. :8.411 Min. :0.1228
## 1st Qu.:2021-01-25 1st Qu.:5979 1st Qu.:8.696 1st Qu.:0.7988
## Median :2021-10-15 Median :6524 Median :8.783 Median :1.0057
## Mean :2021-10-14 Mean :6283 Mean :8.739 Mean :0.8992
## 3rd Qu.:2022-07-13 3rd Qu.:6873 3rd Qu.:8.836 3rd Qu.:1.1293
## Max. :2023-03-24 Max. :7318 Max. :8.898 Max. :1.2780
## value_ret
## Min. :-0.0513845
## 1st Qu.:-0.0047548
## Median : 0.0006952
## Mean : 0.0005424
## 3rd Qu.: 0.0060591
## Max. : 0.0347124
splits %>%
tk_time_series_cv_plan() %>%
plot_time_series_cv_plan(date, value)
Deeper Checking
Seasonality
ihsg_tbl %>%
plot_acf_diagnostics(.date_var = date,.value = value, .lags = 60)
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
ihsg_tbl %>%
plot_acf_diagnostics(.date_var = date,.value = diff_vec(value), .lags = 60)
## diff_vec(): Initial values: 1834.708984
# Lag 26 and Lag 35
# ihsg_tbl %>%
# plot_acf_diagnostics(.date_var = date,.value = log(diff_vec(value)+1), .lags = 60)
#
# ihsg_tbl %>%
# plot_acf_diagnostics(.date_var = date,.value = log(value+1), .lags = 60)
Model
Auto ARIMA
model_fit_arima <- arima_reg() %>%
set_engine(engine = "auto_arima") %>%
fit(value ~ date, data = train_data)
## frequency = 5 observations per 1 week
model_fit_arima
## parsnip model object
##
## Series: outcome
## ARIMA(0,1,1)
##
## Coefficients:
## ma1
## 0.0653
## s.e. 0.0179
##
## sigma^2 = 2312: log likelihood = -16912.17
## AIC=33828.35 AICc=33828.35 BIC=33840.49
Auto ARIMA complete
model_fit_arima_complete <- arima_reg() %>%
set_engine(engine = "auto_arima") %>%
fit(value ~ date
+ fourier_vec(date, period = 5)
+ fourier_vec(date, period = 20)
+ fourier_vec(date, period = 60)
+ fourier_vec(date, period = 200)
+ month(date, label = TRUE), data = train_data)
## frequency = 5 observations per 1 week
model_fit_arima_complete
## parsnip model object
##
## Series: outcome
## Regression with ARIMA(0,1,1) errors
##
## Coefficients:
## ma1 fourier_vec_date_period_5 fourier_vec_date_period_20
## 0.0679 -0.1968 3.8482
## s.e. 0.0180 0.8704 2.2740
## fourier_vec_date_period_60 fourier_vec_date_period_200
## 5.7809 25.8773
## s.e. 6.3815 21.4447
## month_date_label_true_Feb month_date_label_true_Mar
## -2.6177 -12.8883
## s.e. 12.2569 16.5906
## month_date_label_true_Apr month_date_label_true_May
## -9.0040 -21.8731
## s.e. 19.3414 21.2512
## month_date_label_true_Jun month_date_label_true_Jul
## -3.3499 -7.3274
## s.e. 22.3413 22.7719
## month_date_label_true_Aug month_date_label_true_Sep
## 0.3404 -19.6084
## s.e. 22.5111 21.5560
## month_date_label_true_Oct month_date_label_true_Nov
## -39.0250 -44.5712
## s.e. 19.8292 17.0815
## month_date_label_true_Dec
## 2.5900
## s.e. 12.6931
##
## sigma^2 = 2304: log likelihood = -16899.03
## AIC=33832.05 AICc=33832.24 BIC=33935.23
Auto ARIMA ACF
model_fit_arima_complete_acf <- arima_reg() %>%
set_engine(engine = "auto_arima") %>%
fit(value ~ date
+ fourier_vec(date, period = 26)
+ fourier_vec(date, period = 35)
+ fourier_vec(date, period = 54)
+ month(date, label = TRUE), data = train_data)
## frequency = 5 observations per 1 week
model_fit_arima_complete_acf
## parsnip model object
##
## Series: outcome
## Regression with ARIMA(0,1,1) errors
##
## Coefficients:
## ma1 fourier_vec_date_period_26 fourier_vec_date_period_35
## 0.0678 1.2942 -3.9924
## s.e. 0.0180 2.9349 3.8213
## fourier_vec_date_period_54 month_date_label_true_Feb
## -2.7100 -2.7794
## s.e. 5.8796 12.2619
## month_date_label_true_Mar month_date_label_true_Apr
## -13.7400 -8.7465
## s.e. 16.5832 19.3660
## month_date_label_true_May month_date_label_true_Jun
## -20.96 -2.6745
## s.e. 21.29 22.3778
## month_date_label_true_Jul month_date_label_true_Aug
## -7.7051 0.1917
## s.e. 22.7601 22.5058
## month_date_label_true_Sep month_date_label_true_Oct
## -18.8362 -37.5343
## s.e. 21.5705 19.8634
## month_date_label_true_Nov month_date_label_true_Dec
## -42.9790 3.9484
## s.e. 17.1492 12.7417
##
## sigma^2 = 2306: log likelihood = -16900.83
## AIC=33833.65 AICc=33833.82 BIC=33930.77
ARIMA Model Table
model_arima_tbl <- modeltime_table(model_fit_arima,
model_fit_arima_complete,
model_fit_arima_complete_acf) %>%
update_model_description(1, "Auto ARIMA") %>%
update_model_description(2, "ARIMA with logic courier") %>%
update_model_description(3, "ARIMA with ACF Season")
model_arima_tbl
## # Modeltime Table
## # A tibble: 3 × 3
## .model_id .model .model_desc
## <int> <list> <chr>
## 1 1 <fit[+]> Auto ARIMA
## 2 2 <fit[+]> ARIMA with logic courier
## 3 3 <fit[+]> ARIMA with ACF Season
Prophet
model_fit_prophet <- prophet_reg() %>%
set_engine("prophet") %>%
fit(value ~ date, data = train_data)
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
model_fit_prophet
## parsnip model object
##
## PROPHET Model
## - growth: 'linear'
## - n.changepoints: 25
## - changepoint.range: 0.8
## - yearly.seasonality: 'auto'
## - weekly.seasonality: 'auto'
## - daily.seasonality: 'auto'
## - seasonality.mode: 'additive'
## - changepoint.prior.scale: 0.05
## - seasonality.prior.scale: 10
## - holidays.prior.scale: 10
## - logistic_cap: NULL
## - logistic_floor: NULL
## - extra_regressors: 0
Prophet Weekly
model_fit_prophet_wk <- prophet_reg(seasonality_yearly = F,
seasonality_weekly = T,
seasonality_daily = F) %>%
set_engine("prophet") %>%
fit(value ~ date, data = train_data)
model_fit_prophet_wk
## parsnip model object
##
## PROPHET Model
## - growth: 'linear'
## - n.changepoints: 25
## - changepoint.range: 0.8
## - yearly.seasonality: 'FALSE'
## - weekly.seasonality: 'TRUE'
## - daily.seasonality: 'FALSE'
## - seasonality.mode: 'additive'
## - changepoint.prior.scale: 0.05
## - seasonality.prior.scale: 10
## - holidays.prior.scale: 10
## - logistic_cap: NULL
## - logistic_floor: NULL
## - extra_regressors: 0
Prophet Yearly and Weekly
model_fit_prophet_wk_yr <- prophet_reg(seasonality_yearly = T,
seasonality_weekly = T,
seasonality_daily = F) %>%
set_engine("prophet") %>%
fit(value ~ date, data = train_data)
model_fit_prophet_wk_yr
## parsnip model object
##
## PROPHET Model
## - growth: 'linear'
## - n.changepoints: 25
## - changepoint.range: 0.8
## - yearly.seasonality: 'TRUE'
## - weekly.seasonality: 'TRUE'
## - daily.seasonality: 'FALSE'
## - seasonality.mode: 'additive'
## - changepoint.prior.scale: 0.05
## - seasonality.prior.scale: 10
## - holidays.prior.scale: 10
## - logistic_cap: NULL
## - logistic_floor: NULL
## - extra_regressors: 0
Prophet complete
model_fit_prophet_complete <- prophet_reg(seasonality_yearly = TRUE,
seasonality_weekly = TRUE,
seasonality_daily = TRUE) %>%
set_engine("prophet") %>%
fit(value ~ date, data = train_data)
model_fit_prophet_complete
## parsnip model object
##
## PROPHET Model
## - growth: 'linear'
## - n.changepoints: 25
## - changepoint.range: 0.8
## - yearly.seasonality: 'TRUE'
## - weekly.seasonality: 'TRUE'
## - daily.seasonality: 'TRUE'
## - seasonality.mode: 'additive'
## - changepoint.prior.scale: 0.05
## - seasonality.prior.scale: 10
## - holidays.prior.scale: 10
## - logistic_cap: NULL
## - logistic_floor: NULL
## - extra_regressors: 0
Prophet complete w/ fourier
model_fit_prophet_complete_fourier <- prophet_reg(seasonality_yearly = TRUE,
seasonality_weekly = TRUE,
seasonality_daily = TRUE) %>%
set_engine("prophet") %>%
fit(value ~ date
+ fourier_vec(value, period = 5)
+ fourier_vec(value, period = 20)
+ fourier_vec(value, period = 60)
+ fourier_vec(value, period = 200), data = train_data)
model_fit_prophet_complete_fourier
## parsnip model object
##
## PROPHET w/ Regressors Model
## - growth: 'linear'
## - n.changepoints: 25
## - changepoint.range: 0.8
## - yearly.seasonality: 'TRUE'
## - weekly.seasonality: 'TRUE'
## - daily.seasonality: 'TRUE'
## - seasonality.mode: 'additive'
## - changepoint.prior.scale: 0.05
## - seasonality.prior.scale: 10
## - holidays.prior.scale: 10
## - logistic_cap: NULL
## - logistic_floor: NULL
## - extra_regressors: 4
Prophet complete w/ fourier ACF
model_fit_prophet_complete_fourier_acf <- prophet_reg(seasonality_yearly = TRUE,
seasonality_weekly = TRUE,
seasonality_daily = TRUE) %>%
set_engine("prophet") %>%
fit(value ~ date
+ fourier_vec(value, period = 26)
+ fourier_vec(value, period = 35)
+ fourier_vec(value, period = 54), data = train_data)
model_fit_prophet_complete_fourier_acf
## parsnip model object
##
## PROPHET w/ Regressors Model
## - growth: 'linear'
## - n.changepoints: 25
## - changepoint.range: 0.8
## - yearly.seasonality: 'TRUE'
## - weekly.seasonality: 'TRUE'
## - daily.seasonality: 'TRUE'
## - seasonality.mode: 'additive'
## - changepoint.prior.scale: 0.05
## - seasonality.prior.scale: 10
## - holidays.prior.scale: 10
## - logistic_cap: NULL
## - logistic_floor: NULL
## - extra_regressors: 3
Prophet Model Table
model_prophet_tbl <- modeltime_table(model_fit_prophet,
model_fit_prophet_wk,
model_fit_prophet_wk_yr,
model_fit_prophet_complete,
model_fit_prophet_complete_fourier,
model_fit_prophet_complete_fourier_acf) %>%
update_model_description(1, "Prophet Basic") %>%
update_model_description(2, "Prophet Weekly Seasonality") %>%
update_model_description(3, "Prophet Weekly and Yearly Seasonality") %>%
update_model_description(4, "Prophet All Seasonality") %>%
update_model_description(5, "Prophet All Season and Logic Fourier") %>%
update_model_description(6, "Prophet All Season and ACF Fourier")
model_prophet_tbl
## # Modeltime Table
## # A tibble: 6 × 3
## .model_id .model .model_desc
## <int> <list> <chr>
## 1 1 <fit[+]> Prophet Basic
## 2 2 <fit[+]> Prophet Weekly Seasonality
## 3 3 <fit[+]> Prophet Weekly and Yearly Seasonality
## 4 4 <fit[+]> Prophet All Seasonality
## 5 5 <fit[+]> Prophet All Season and Logic Fourier
## 6 6 <fit[+]> Prophet All Season and ACF Fourier
TBATS
model_fit_tbats <- seasonal_reg(seasonal_period_1 = 5,
seasonal_period_2 = 20,
seasonal_period_3 = 60) %>%
set_engine("tbats") %>%
fit(value ~ date, data = train_data)
model_fit_tbats
## parsnip model object
##
## BATS(0.729, {0,0}, -, -)
##
## Call: forecast::tbats(y = outcome, use.parallel = use.parallel)
##
## Parameters
## Lambda: 0.728702
## Alpha: 1.066522
##
## Seed States:
## [,1]
## [1,] 319.083
## attr(,"lambda")
## [1] 0.7287018
##
## Sigma: 5.040043
## AIC: 50498.76
TBATS ACF
model_fit_tbats_acf <- seasonal_reg(seasonal_period_1 = 26,
seasonal_period_2 = 35,
seasonal_period_3 = 54) %>%
set_engine("tbats") %>%
fit(value ~ date, data = train_data)
model_fit_tbats_acf
## parsnip model object
##
## BATS(0.729, {0,0}, -, -)
##
## Call: forecast::tbats(y = outcome, use.parallel = use.parallel)
##
## Parameters
## Lambda: 0.728702
## Alpha: 1.066522
##
## Seed States:
## [,1]
## [1,] 319.083
## attr(,"lambda")
## [1] 0.7287018
##
## Sigma: 5.040043
## AIC: 50498.76
TBATS Model Table
model_tbats_tbl <- modeltime_table(model_fit_tbats,
model_fit_tbats_acf) %>%
update_model_description(1, "TBATS Logic Courier") %>%
update_model_description(2, "TBATS ACF Courier")
model_tbats_tbl
## # Modeltime Table
## # A tibble: 2 × 3
## .model_id .model .model_desc
## <int> <list> <chr>
## 1 1 <fit[+]> TBATS Logic Courier
## 2 2 <fit[+]> TBATS ACF Courier
Model Table
# ?modeltime_table()
# ?combine_modeltime_tables()
model_ihsg_tbl <- combine_modeltime_tables(model_arima_tbl,
model_prophet_tbl,
model_tbats_tbl)
model_ihsg_tbl
## # Modeltime Table
## # A tibble: 11 × 3
## .model_id .model .model_desc
## <int> <list> <chr>
## 1 1 <fit[+]> Auto ARIMA
## 2 2 <fit[+]> ARIMA with logic courier
## 3 3 <fit[+]> ARIMA with ACF Season
## 4 4 <fit[+]> Prophet Basic
## 5 5 <fit[+]> Prophet Weekly Seasonality
## 6 6 <fit[+]> Prophet Weekly and Yearly Seasonality
## 7 7 <fit[+]> Prophet All Seasonality
## 8 8 <fit[+]> Prophet All Season and Logic Fourier
## 9 9 <fit[+]> Prophet All Season and ACF Fourier
## 10 10 <fit[+]> TBATS Logic Courier
## 11 11 <fit[+]> TBATS ACF Courier
Calibration
calibration_ihsg_tbl <- model_ihsg_tbl %>%
modeltime_calibrate(new_data = test_data)
calibration_ihsg_tbl
## # Modeltime Table
## # A tibble: 11 × 5
## .model_id .model .model_desc .type .calibration…¹
## <int> <list> <chr> <chr> <list>
## 1 1 <fit[+]> Auto ARIMA Test <tibble>
## 2 2 <fit[+]> ARIMA with logic courier Test <tibble>
## 3 3 <fit[+]> ARIMA with ACF Season Test <tibble>
## 4 4 <fit[+]> Prophet Basic Test <tibble>
## 5 5 <fit[+]> Prophet Weekly Seasonality Test <tibble>
## 6 6 <fit[+]> Prophet Weekly and Yearly Seasonality Test <tibble>
## 7 7 <fit[+]> Prophet All Seasonality Test <tibble>
## 8 8 <fit[+]> Prophet All Season and Logic Fourier Test <tibble>
## 9 9 <fit[+]> Prophet All Season and ACF Fourier Test <tibble>
## 10 10 <fit[+]> TBATS Logic Courier Test <tibble>
## 11 11 <fit[+]> TBATS ACF Courier Test <tibble>
## # … with abbreviated variable name ¹​.calibration_data
Accuracy
calibration_ihsg_tbl %>%
modeltime_accuracy() %>% glimpse()
## Warning: A correlation computation is required, but `estimate` is constant
## and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be
## returned.
## Warning: A correlation computation is required, but `estimate` is constant and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be returned.
## A correlation computation is required, but `estimate` is constant and has 0 standard deviation, resulting in a divide by 0 error. `NA` will be returned.
## Rows: 11
## Columns: 9
## $ .model_id <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
## $ .model_desc <chr> "Auto ARIMA", "ARIMA with logic courier", "ARIMA with ACF …
## $ .type <chr> "Test", "Test", "Test", "Test", "Test", "Test", "Test", "T…
## $ mae <dbl> 1690.3058, 1677.7054, 1690.1833, 721.2147, 721.0091, 721.2…
## $ mape <dbl> 25.79479, 25.59434, 25.79463, 11.57737, 11.56923, 11.57737…
## $ mase <dbl> 39.87479, 39.57754, 39.87190, 17.01366, 17.00881, 17.01366…
## $ smape <dbl> 30.27130, 30.00656, 30.26931, 11.78186, 11.77799, 11.78186…
## $ rmse <dbl> 1840.1592, 1828.2789, 1839.8238, 833.3923, 829.8259, 833.3…
## $ rsq <dbl> NA, 0.002485351, 0.003682248, 0.278112443, 0.783344417, 0.…
Forecast
calibration_ihsg_tbl %>%
modeltime_forecast(new_data = test_data, actual_data = ihsg_tbl) %>% glimpse()
## Rows: 11,753
## Columns: 7
## $ .model_id <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ .model_desc <chr> "ACTUAL", "ACTUAL", "ACTUAL", "ACTUAL", "ACTUAL", "ACTUAL"…
## $ .key <fct> actual, actual, actual, actual, actual, actual, actual, ac…
## $ .index <date> 2007-01-03, 2007-01-04, 2007-01-05, 2007-01-08, 2007-01-0…
## $ .value <dbl> 1834.709, 1824.103, 1832.550, 1813.394, 1780.881, 1710.367…
## $ .conf_lo <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ .conf_hi <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
Plot Forecast
calibration_ihsg_tbl %>%
modeltime_forecast(new_data = test_data, actual_data = ihsg_tbl) %>%
plot_modeltime_forecast(.conf_interval_show = FALSE)
Refit
refit_ihsg_tbl <- calibration_ihsg_tbl %>%
modeltime_refit(data = ihsg_tbl)
## frequency = 5 observations per 1 week
## frequency = 5 observations per 1 week
## frequency = 5 observations per 1 week
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
Plot Refit
refit_ihsg_tbl %>%
modeltime_forecast(new_data = ihsg_future_tbl,
actual_data = ihsg_full_tbl) %>%
plot_modeltime_forecast(.conf_interval_show = F)
## Error: Problem occurred during prediction. Error in setup_dataframe(object, df): Found NaN in column fourier_vec_value_period_5
## Error: Problem occurred during prediction. Error in setup_dataframe(object, df): Found NaN in column fourier_vec_value_period_26
Library Prophet
library(prophet)
## Loading required package: Rcpp
##
## Attaching package: 'Rcpp'
## The following object is masked from 'package:rsample':
##
## populate
## Loading required package: rlang
##
## Attaching package: 'rlang'
## The following objects are masked from 'package:purrr':
##
## %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
## flatten_lgl, flatten_raw, invoke, splice
ihsg_df <- ihsg_tbl %>%
select(date, value) %>%
rename(ds = date, y = value) %>%
as.data.frame()
ihsg_prophet <- prophet(df = ihsg_df)
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
future_df <- make_future_dataframe(ihsg_prophet, periods = 100)
forecast_df <- predict(ihsg_prophet, future_df)
plot(ihsg_prophet, forecast_df)
