Exponential Smoothing Double BPJS

Exponential Smoothing Double

Package

library(forecast)
library(TTR)
library(graphics)
library(smooth)
## Loading required package: greybox
## Package "greybox", v2.0.8 loaded.
## This is package "smooth", v4.4.0
## 
## Attaching package: 'smooth'
## The following object is masked from 'package:TTR':
## 
##     lags
library(Mcomp)
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ lubridate 1.9.5     ✔ tibble    3.3.1
## ✔ purrr     1.2.1     ✔ tidyr     1.3.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ lubridate::hm() masks greybox::hm()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ tidyr::spread() masks greybox::spread()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyr)
library(fpp2)
## ── Attaching packages ──────────────────────────────────────────── fpp2 2.5.1 ──
## ✔ fma       2.5     ✔ expsmooth 2.3

Import Data

BPJS <- read.csv("C:/Users/kevin/Documents/LAPORAN PKL/DataBPJS READY/DATABPJS2019-2023NEW.csv",header = TRUE, row.names = 1)

Exploratory Data Analysis

BPJS$Waktu <- rownames(BPJS)

rownames(BPJS) <- NULL
BPJS$Waktu <- as.Date(BPJS$Waktu)  
colnames(BPJS)[1] <- "Pembayaran"  
head(BPJS)
##    Pembayaran      Waktu
## 1 30886897141 2019-01-01
## 2 26668399930 2019-02-01
## 3 24249489447 2019-03-01
## 4 20294330876 2019-04-01
## 5 20973792476 2019-05-01
## 6 20432072980 2019-06-01
str(BPJS)
## 'data.frame':    60 obs. of  2 variables:
##  $ Pembayaran: num  3.09e+10 2.67e+10 2.42e+10 2.03e+10 2.10e+10 ...
##  $ Waktu     : Date, format: "2019-01-01" "2019-02-01" ...
BPJS
##     Pembayaran      Waktu
## 1  30886897141 2019-01-01
## 2  26668399930 2019-02-01
## 3  24249489447 2019-03-01
## 4  20294330876 2019-04-01
## 5  20973792476 2019-05-01
## 6  20432072980 2019-06-01
## 7  27091069620 2019-07-01
## 8  26622161737 2019-08-01
## 9  23308081859 2019-09-01
## 10 29037231218 2019-10-01
## 11 25146786106 2019-11-01
## 12 26433345289 2019-12-01
## 13 28949995697 2020-01-01
## 14 29149853765 2020-02-01
## 15 27596946152 2020-03-01
## 16 16751943745 2020-04-01
## 17 18304214335 2020-05-01
## 18 34148012138 2020-06-01
## 19 38550408900 2020-07-01
## 20 27894931182 2020-08-01
## 21 30747995020 2020-09-01
## 22 24718776688 2020-10-01
## 23 17219932456 2020-11-01
## 24  8670469141 2020-12-01
## 25  6021389948 2021-01-01
## 26  3397573896 2021-02-01
## 27  4593792354 2021-03-01
## 28  8400940015 2021-04-01
## 29  5787310029 2021-05-01
## 30  6415138631 2021-06-01
## 31  2123977287 2021-07-01
## 32  3737131137 2021-08-01
## 33  7967293209 2021-09-01
## 34 10543311575 2021-10-01
## 35 17707198695 2021-11-01
## 36 10647000502 2021-12-01
## 37  6697464155 2022-01-01
## 38 15912486490 2022-02-01
## 39 15999436750 2022-03-01
## 40 13952847980 2022-04-01
## 41 11238150670 2022-05-01
## 42 15279343540 2022-06-01
## 43 12703788880 2022-07-01
## 44  8337575080 2022-08-01
## 45 11393311140 2022-09-01
## 46 27371007255 2022-10-01
## 47 32355702360 2022-11-01
## 48 29349703800 2022-12-01
## 49 26809350900 2023-01-01
## 50 55816909563 2023-02-01
## 51 13419968124 2023-03-01
## 52  8656045611 2023-04-01
## 53 13758874488 2023-05-01
## 54 18147594299 2023-06-01
## 55 14001133647 2023-07-01
## 56 16734196688 2023-08-01
## 57 13625340234 2023-09-01
## 58 16537596436 2023-10-01
## 59 20283793513 2023-11-01
## 60 14485444422 2023-12-01

Cek Missing Value

sapply(BPJS, function(x) sum(is.na(x)))
## Pembayaran      Waktu 
##          0          0

Cek Duplicated Data

any(duplicated(BPJS))
## [1] FALSE

Summary Data

summary(BPJS$Pembayaran)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 2.124e+09 1.109e+10 1.699e+10 1.873e+10 2.670e+10 5.582e+10

Preprocessing

Transform Data

BPJS.ts<-ts(BPJS$Pembayaran)
length(BPJS.ts)
## [1] 60
BPJS.ts.month<-ts(BPJS$Pembayaran,start = c(2019,1),frequency = 12)
BPJS.clean <- tsclean(BPJS.ts.month)
DataBPJS <- log(BPJS.clean)
summary(DataBPJS)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   21.48   23.13   23.56   23.46   24.01   24.38
BPJS$Waktu <- as.Date(BPJS$Waktu, format = "%Y-%m-%d")

Plot Data

autoplot(DataBPJS) +
  labs(title = "Pembayaran BPJS Bulanan (2019-2023)",
       x = "Tahun", y = "Jumlah Pembayaran (Rp)")

decomp <- decompose(DataBPJS, type = "additive") 
plot(decomp)

Convert Time Series

tsData <- ts(DataBPJS, frequency = 12)  
library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
adf.test(tsData)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  tsData
## Dickey-Fuller = -2.0242, Lag order = 3, p-value = 0.565
## alternative hypothesis: stationary

Plot ACF

# Plot ACF
acf_result <- acf(tsData, main = "ACF Data BPJS", lag.max = 36)  

pacf(tsData, main = "PACF Data BPJS", lag.max = 36)

# Ambil nilai ACF
acf_values <- acf_result$acf
acf_values
## , , 1
## 
##               [,1]
##  [1,]  1.000000000
##  [2,]  0.806361702
##  [3,]  0.600206152
##  [4,]  0.476230378
##  [5,]  0.402306848
##  [6,]  0.379343599
##  [7,]  0.305574834
##  [8,]  0.234207820
##  [9,]  0.148046047
## [10,]  0.036603215
## [11,] -0.002054122
## [12,] -0.028832355
## [13,] -0.071023072
## [14,] -0.112661487
## [15,] -0.159238454
## [16,] -0.208302039
## [17,] -0.264703285
## [18,] -0.292749992
## [19,] -0.293995326
## [20,] -0.286136404
## [21,] -0.248002774
## [22,] -0.287093242
## [23,] -0.368284577
## [24,] -0.353303865
## [25,] -0.289247078
## [26,] -0.205709314
## [27,] -0.123125152
## [28,] -0.084709910
## [29,] -0.073456688
## [30,] -0.100488698
## [31,] -0.133648583
## [32,] -0.084572482
## [33,] -0.056695269
## [34,] -0.043696034
## [35,] -0.020960678
## [36,]  0.002020823
## [37,]  0.013745969
# Deteksi pola musiman (misalnya musiman tahunan = lag 12, 24, 36)
seasonal_lags <- seq(12, 36, 12)  
seasonal_pattern <- acf_values[seasonal_lags + 1]  # +1 karena lag 0 ikut dihitung
# Cek apakah ada korelasi signifikan di lag musiman
seasonal_pattern
## [1] -0.07102307 -0.28924708  0.01374597

Modelling

Splitting Data

train <- head(DataBPJS, -12)
str(train)
##  Time-Series [1:48] from 2019 to 2023: 24.2 24 23.9 23.7 23.8 ...
test  <- tail(DataBPJS, 12)
str(test)
##  Time-Series [1:12] from 2023 to 2024: 24 23.8 23.3 22.9 23.3 ...

Model Double

model1 <- HoltWinters(train, gamma = FALSE)
model1
## Holt-Winters exponential smoothing with trend and without seasonal component.
## 
## Call:
## HoltWinters(x = train, gamma = FALSE)
## 
## Smoothing parameters:
##  alpha: 1
##  beta : 0.04370196
##  gamma: FALSE
## 
## Coefficients:
##          [,1]
## a 24.10254829
## b  0.01244111
#predict
prediksi1 <- forecast(model1, h = 12)
prediksi1
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Jan 2023       24.11499 23.57891 24.65107 23.29512 24.93486
## Feb 2023       24.12743 23.35255 24.90231 22.94235 25.31251
## Mar 2023       24.13987 23.17020 25.10954 22.65689 25.62285
## Apr 2023       24.15231 23.00866 25.29596 22.40325 25.90138
## May 2023       24.16475 22.85916 25.47035 22.16802 26.16149
## Jun 2023       24.17719 22.71731 25.63708 21.94449 26.40990
## Jul 2023       24.18964 22.58056 25.79871 21.72876 26.65051
## Aug 2023       24.20208 22.44730 25.95685 21.51838 26.88578
## Sep 2023       24.21452 22.31644 26.11260 21.31166 27.11738
## Oct 2023       24.22696 22.18720 26.26672 21.10742 27.34650
## Nov 2023       24.23940 22.05902 26.41978 20.90479 27.57401
## Dec 2023       24.25184 21.93146 26.57222 20.70312 27.80056

Plot Forecast

tsData <- ts(DataBPJS, start = c(2019,1), frequency = 12)

New Modelling

train <- window(tsData, start = c(2019,1), end = c(2022,12))
test  <- window(tsData, start = c(2023,1), end = c(2023,12))

# Model Holt linear
model1 <- HoltWinters(train, gamma = FALSE)
model2 <- HoltWinters(ts(c(train, test), frequency = 12), gamma = FALSE)

Evaluation

Forecast model

forecast_futureee <- forecast(model2, h = 24)
prediksi_test <- forecast(model1, h = length(test))
fitted_train <- fitted(model1)

Plot Final

autoplot(prediksi1) + 
  autolayer(tsData, series="Data Aktual", color="black") +
  autolayer(test, series="Test Data", color="red") + 
  autolayer(train, series="Training Data", color="blue") +
  ggtitle("Peramalan Holt Linear") +
  ylab("Pembayaran BPJS") + 
  xlab("Waktu") +
  scale_color_manual(values=c("black", "red", "blue"))

Acurancy

acc <- accuracy(prediksi1, test)
acc <- cbind(acc, RMSE = (acc[,"RMSE"])^2)
acc
##                       ME      RMSE       MAE        MPE     MAPE      MASE
## Training set  0.07923921 0.4212558 0.3302507  0.3231357 1.427443 0.3925961
## Test set     -0.69522111 0.7510177 0.6952211 -2.9742967 2.974297 0.8264662
##                    ACF1 Theil's U      RMSE
## Training set 0.03020669        NA 0.1774565
## Test set     0.23733639  2.496072 0.5640276

Plot Comparison

dates <- seq(as.Date("2019-01-01"), by = "month", length.out = length(tsData))
df_actual <- data.frame(
  waktu = dates,
  nilai = as.numeric(tsData),
  seri = "Data Aktual"
)

df_train <- data.frame(
  waktu = dates[1:length(train)],
  nilai = as.numeric(train),
  seri = "Training Data"
)

df_test <- data.frame(
  waktu = tail(dates, length(test)),
  nilai = as.numeric(test),
  seri = "Test Data"
)

df_pred_train <- data.frame(
  waktu = dates[1:length(fitted(model1))],
  nilai = as.numeric(fitted(model1)),
  seri = "Forecast Training"
)

df_pred_test <- data.frame(
  waktu = tail(dates, length(prediksi1$mean)),
  nilai = as.numeric(prediksi1$mean),
  seri = "Forecast Test"
)

df_future <- data.frame(
  waktu = seq(max(dates) + 30, by = "month", length.out = length(forecast_futureee$mean)),
  nilai = as.numeric(forecast_futureee$mean),
  seri = "Forecast Future"
)
df_all <- bind_rows(df_actual, df_train, df_test,
                    df_pred_train, df_pred_test, df_future)


ggplot(df_all, aes(x = waktu, y = nilai, color = seri)) +
  geom_line(size = 1.5) +
  labs(title = "Peramalan Holt Linear Exponential Smoothing",
       x = "Waktu", y = "Pembayaran BPJS") +
  scale_color_manual(values = c(
    "Data Aktual" = "black",
    "Training Data" = "blue",
    "Test Data" = "red",
    "Forecast Training" = "orange",
    "Forecast Test" = "green",
    "Forecast Future" = "purple"
  )) +
  theme_minimal(base_size = 10)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 78 rows containing missing values or values outside the scale range
## (`geom_line()`).