Exponential Smoothing Double BPJS
Exponential Smoothing Double
Package
## Loading required package: greybox
## Package "greybox", v2.0.8 loaded.
## This is package "smooth", v4.4.0
##
## Attaching package: 'smooth'
## The following object is masked from 'package:TTR':
##
## lags
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ lubridate 1.9.5 ✔ tibble 3.3.1
## ✔ purrr 1.2.1 ✔ tidyr 1.3.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ lubridate::hm() masks greybox::hm()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tidyr::spread() masks greybox::spread()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## ── Attaching packages ──────────────────────────────────────────── fpp2 2.5.1 ──
## ✔ fma 2.5 ✔ expsmooth 2.3
Import Data
Exploratory Data Analysis
BPJS$Waktu <- rownames(BPJS)
rownames(BPJS) <- NULL
BPJS$Waktu <- as.Date(BPJS$Waktu)
colnames(BPJS)[1] <- "Pembayaran"
head(BPJS)## Pembayaran Waktu
## 1 30886897141 2019-01-01
## 2 26668399930 2019-02-01
## 3 24249489447 2019-03-01
## 4 20294330876 2019-04-01
## 5 20973792476 2019-05-01
## 6 20432072980 2019-06-01
## 'data.frame': 60 obs. of 2 variables:
## $ Pembayaran: num 3.09e+10 2.67e+10 2.42e+10 2.03e+10 2.10e+10 ...
## $ Waktu : Date, format: "2019-01-01" "2019-02-01" ...
## Pembayaran Waktu
## 1 30886897141 2019-01-01
## 2 26668399930 2019-02-01
## 3 24249489447 2019-03-01
## 4 20294330876 2019-04-01
## 5 20973792476 2019-05-01
## 6 20432072980 2019-06-01
## 7 27091069620 2019-07-01
## 8 26622161737 2019-08-01
## 9 23308081859 2019-09-01
## 10 29037231218 2019-10-01
## 11 25146786106 2019-11-01
## 12 26433345289 2019-12-01
## 13 28949995697 2020-01-01
## 14 29149853765 2020-02-01
## 15 27596946152 2020-03-01
## 16 16751943745 2020-04-01
## 17 18304214335 2020-05-01
## 18 34148012138 2020-06-01
## 19 38550408900 2020-07-01
## 20 27894931182 2020-08-01
## 21 30747995020 2020-09-01
## 22 24718776688 2020-10-01
## 23 17219932456 2020-11-01
## 24 8670469141 2020-12-01
## 25 6021389948 2021-01-01
## 26 3397573896 2021-02-01
## 27 4593792354 2021-03-01
## 28 8400940015 2021-04-01
## 29 5787310029 2021-05-01
## 30 6415138631 2021-06-01
## 31 2123977287 2021-07-01
## 32 3737131137 2021-08-01
## 33 7967293209 2021-09-01
## 34 10543311575 2021-10-01
## 35 17707198695 2021-11-01
## 36 10647000502 2021-12-01
## 37 6697464155 2022-01-01
## 38 15912486490 2022-02-01
## 39 15999436750 2022-03-01
## 40 13952847980 2022-04-01
## 41 11238150670 2022-05-01
## 42 15279343540 2022-06-01
## 43 12703788880 2022-07-01
## 44 8337575080 2022-08-01
## 45 11393311140 2022-09-01
## 46 27371007255 2022-10-01
## 47 32355702360 2022-11-01
## 48 29349703800 2022-12-01
## 49 26809350900 2023-01-01
## 50 55816909563 2023-02-01
## 51 13419968124 2023-03-01
## 52 8656045611 2023-04-01
## 53 13758874488 2023-05-01
## 54 18147594299 2023-06-01
## 55 14001133647 2023-07-01
## 56 16734196688 2023-08-01
## 57 13625340234 2023-09-01
## 58 16537596436 2023-10-01
## 59 20283793513 2023-11-01
## 60 14485444422 2023-12-01
Preprocessing
Transform Data
## [1] 60
BPJS.ts.month<-ts(BPJS$Pembayaran,start = c(2019,1),frequency = 12)
BPJS.clean <- tsclean(BPJS.ts.month)
DataBPJS <- log(BPJS.clean)
summary(DataBPJS)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 21.48 23.13 23.56 23.46 24.01 24.38
Plot Data
autoplot(DataBPJS) +
labs(title = "Pembayaran BPJS Bulanan (2019-2023)",
x = "Tahun", y = "Jumlah Pembayaran (Rp)")Convert Time Series
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Augmented Dickey-Fuller Test
##
## data: tsData
## Dickey-Fuller = -2.0242, Lag order = 3, p-value = 0.565
## alternative hypothesis: stationary
Plot ACF
## , , 1
##
## [,1]
## [1,] 1.000000000
## [2,] 0.806361702
## [3,] 0.600206152
## [4,] 0.476230378
## [5,] 0.402306848
## [6,] 0.379343599
## [7,] 0.305574834
## [8,] 0.234207820
## [9,] 0.148046047
## [10,] 0.036603215
## [11,] -0.002054122
## [12,] -0.028832355
## [13,] -0.071023072
## [14,] -0.112661487
## [15,] -0.159238454
## [16,] -0.208302039
## [17,] -0.264703285
## [18,] -0.292749992
## [19,] -0.293995326
## [20,] -0.286136404
## [21,] -0.248002774
## [22,] -0.287093242
## [23,] -0.368284577
## [24,] -0.353303865
## [25,] -0.289247078
## [26,] -0.205709314
## [27,] -0.123125152
## [28,] -0.084709910
## [29,] -0.073456688
## [30,] -0.100488698
## [31,] -0.133648583
## [32,] -0.084572482
## [33,] -0.056695269
## [34,] -0.043696034
## [35,] -0.020960678
## [36,] 0.002020823
## [37,] 0.013745969
# Deteksi pola musiman (misalnya musiman tahunan = lag 12, 24, 36)
seasonal_lags <- seq(12, 36, 12)
seasonal_pattern <- acf_values[seasonal_lags + 1] # +1 karena lag 0 ikut dihitung
# Cek apakah ada korelasi signifikan di lag musiman
seasonal_pattern## [1] -0.07102307 -0.28924708 0.01374597
Modelling
Splitting Data
## Time-Series [1:48] from 2019 to 2023: 24.2 24 23.9 23.7 23.8 ...
## Time-Series [1:12] from 2023 to 2024: 24 23.8 23.3 22.9 23.3 ...
Model Double
## Holt-Winters exponential smoothing with trend and without seasonal component.
##
## Call:
## HoltWinters(x = train, gamma = FALSE)
##
## Smoothing parameters:
## alpha: 1
## beta : 0.04370196
## gamma: FALSE
##
## Coefficients:
## [,1]
## a 24.10254829
## b 0.01244111
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2023 24.11499 23.57891 24.65107 23.29512 24.93486
## Feb 2023 24.12743 23.35255 24.90231 22.94235 25.31251
## Mar 2023 24.13987 23.17020 25.10954 22.65689 25.62285
## Apr 2023 24.15231 23.00866 25.29596 22.40325 25.90138
## May 2023 24.16475 22.85916 25.47035 22.16802 26.16149
## Jun 2023 24.17719 22.71731 25.63708 21.94449 26.40990
## Jul 2023 24.18964 22.58056 25.79871 21.72876 26.65051
## Aug 2023 24.20208 22.44730 25.95685 21.51838 26.88578
## Sep 2023 24.21452 22.31644 26.11260 21.31166 27.11738
## Oct 2023 24.22696 22.18720 26.26672 21.10742 27.34650
## Nov 2023 24.23940 22.05902 26.41978 20.90479 27.57401
## Dec 2023 24.25184 21.93146 26.57222 20.70312 27.80056
Evaluation
Forecast model
Plot Final
autoplot(prediksi1) +
autolayer(tsData, series="Data Aktual", color="black") +
autolayer(test, series="Test Data", color="red") +
autolayer(train, series="Training Data", color="blue") +
ggtitle("Peramalan Holt Linear") +
ylab("Pembayaran BPJS") +
xlab("Waktu") +
scale_color_manual(values=c("black", "red", "blue"))Acurancy
## ME RMSE MAE MPE MAPE MASE
## Training set 0.07923921 0.4212558 0.3302507 0.3231357 1.427443 0.3925961
## Test set -0.69522111 0.7510177 0.6952211 -2.9742967 2.974297 0.8264662
## ACF1 Theil's U RMSE
## Training set 0.03020669 NA 0.1774565
## Test set 0.23733639 2.496072 0.5640276
Plot Comparison
dates <- seq(as.Date("2019-01-01"), by = "month", length.out = length(tsData))
df_actual <- data.frame(
waktu = dates,
nilai = as.numeric(tsData),
seri = "Data Aktual"
)
df_train <- data.frame(
waktu = dates[1:length(train)],
nilai = as.numeric(train),
seri = "Training Data"
)
df_test <- data.frame(
waktu = tail(dates, length(test)),
nilai = as.numeric(test),
seri = "Test Data"
)
df_pred_train <- data.frame(
waktu = dates[1:length(fitted(model1))],
nilai = as.numeric(fitted(model1)),
seri = "Forecast Training"
)
df_pred_test <- data.frame(
waktu = tail(dates, length(prediksi1$mean)),
nilai = as.numeric(prediksi1$mean),
seri = "Forecast Test"
)
df_future <- data.frame(
waktu = seq(max(dates) + 30, by = "month", length.out = length(forecast_futureee$mean)),
nilai = as.numeric(forecast_futureee$mean),
seri = "Forecast Future"
)
df_all <- bind_rows(df_actual, df_train, df_test,
df_pred_train, df_pred_test, df_future)
ggplot(df_all, aes(x = waktu, y = nilai, color = seri)) +
geom_line(size = 1.5) +
labs(title = "Peramalan Holt Linear Exponential Smoothing",
x = "Waktu", y = "Pembayaran BPJS") +
scale_color_manual(values = c(
"Data Aktual" = "black",
"Training Data" = "blue",
"Test Data" = "red",
"Forecast Training" = "orange",
"Forecast Test" = "green",
"Forecast Future" = "purple"
)) +
theme_minimal(base_size = 10)## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 78 rows containing missing values or values outside the scale range
## (`geom_line()`).