1 Library

library("tseries") 
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library("forecast") 
library("TTR") 
library("TSA") 
## Registered S3 methods overwritten by 'TSA':
##   method       from    
##   fitted.Arima forecast
##   plot.Arima   forecast
## 
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## The following object is masked from 'package:utils':
## 
##     tar
library("graphics")
library("dplyr")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("ggplot2")

2 Data

Data yang digunakan merupakan data jumlah rujukan FKTP ke FKRTL BPJS Kesehatan cabang Yogyakarta dari januari 2020 - September 2025.

kasus <- read.csv(file = "D:/Kuliah/Magang/PKL 2025/BPJS kes/Laporan/data/kasus.csv")
kasus$Jumlah <- gsub(",", "", kasus$Jumlah)   
kasus$Jumlah <- trimws(kasus$Jumlah)          
kasus$Jumlah <- as.numeric(kasus$Jumlah)    
str(kasus)
## 'data.frame':    69 obs. of  2 variables:
##  $ Bulan : chr  "2020-01" "2020-02" "2020-03" "2020-04" ...
##  $ Jumlah: num  113720 111248 103755 71973 63245 ...
summary(kasus)
##     Bulan               Jumlah      
##  Length:69          Min.   : 63245  
##  Class :character   1st Qu.:101158  
##  Mode  :character   Median :138302  
##                     Mean   :142507  
##                     3rd Qu.:185192  
##                     Max.   :213899
data.ts <- ts(kasus$Jumlah,start = c(2020, 1),frequency = 12)

plot(data.ts, main = "Jumlah Kasus", ylab = "Total", xlab = "Periode")

2.1 Partisi Data

n <- nrow(kasus)
train_size <- floor(0.7 * n)

data.training <- kasus[1:train_size, ]
data.test  <- kasus[(train_size + 1):n, ]
data.training.ts <- ts(
  data.training$Jumlah,
  start = c(2020, 1),
  frequency = 12
)

data.test.ts <- ts(
  data.test$Jumlah,
  start = c(2020, 1 + train_size),
  frequency = 12
)
data.training
data.test
# data training
plot.ts(
  data.training$Jumlah,
  main = "Data Training Kasus",
  xlab = "Bulan",
  ylab = "Jumlah"
)

plot.ts(
  data.test$Jumlah,
  main = "Data Testing Kasus",
  xlab = "Bulan",
  ylab = "Jumlah"
)

Berdasarkan plot deret waktu diatas, terlihat bahwa data training dan testing cenderung memiliki pola trend.

3 Stasioneritas Data

3.1 Plot ACF

acf(
  data.training.ts,
  main = "Plot ACF Data Training Kasus"
)

Plot ACF memperlihatkan pola penurunan autokorelasi yang berlangsung secara perlahan, yang menjadi indikasi bahwa data tidak dalam kondisi stasioner.

adf.test(data.training.ts)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  data.training.ts
## Dickey-Fuller = -2.7523, Lag order = 3, p-value = 0.2737
## alternative hypothesis: stationary

Nilai p-value sebesar 0.2737 yang lebih tinggi dari tingkat signifikansi 0.05 mengindikasikan bahwa H0 tidak ditolak, sehingga data belum stasioneritas.

3.2 Differencing

# Differencing Ordo 1
data.dif1 <- diff(data.training.ts, difference=1)
plot.ts(data.dif1, lty=1, xlab="Waktu", ylab="Data Penggna  Diff Ordo 1")
points(data.dif1)

# Cek Kestasioneran
acf(data.dif1, main="Plot ACF Data kasus  setelah Diff 1", lag.max = 10)

adf.test(data.dif1) # Augmented Dickey-Fuller
## Warning in adf.test(data.dif1): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  data.dif1
## Dickey-Fuller = -6.2336, Lag order = 3, p-value = 0.01
## alternative hypothesis: stationary

Nilai p-value sebesar 0.01 yang lebih kecil dari 0.05 menunjukkan bahwa H0 ditolak, sehingga deret waktu pada diferensiasi pertama dapat dikatakan telah stasioner.

pacf(data.dif1, main="Plot PACF Data kasus ", lag.max=10) 

eacf(data.dif1, ar.max = 10,ma.max = 10)
## AR/MA
##    0 1 2 3 4 5 6 7 8 9 10
## 0  x o o o o o o o o o o 
## 1  o o o o o o o o o o o 
## 2  x o o o o o o o o o o 
## 3  o o o o o o o o o o o 
## 4  x o o o o o o o o o o 
## 5  x o o o o o o o o o o 
## 6  x o o o o o o o o o o 
## 7  x o o o o o o o o o o 
## 8  x o o o o o o o o o o 
## 9  x x x o o o o o o o o 
## 10 x o o o o o o o o o o

4 Kandidat Model

  1. ARIMA (1,1,1)
  2. ARIMA (0,1,1)
  3. ARIMA (0,1,2)
  4. ARIMA (1,1,2)
  5. ARIMA (2,1,1)
arima1<-arima(data.dif1, order=c(1,0,1),include.mean=FALSE, method="ML") ###ARIMA (1,1,1)
arima2<-arima(data.dif1, order=c(0,0,1),include.mean=FALSE, method="ML") ###ARIMA (0,1,1)
arima3<-arima(data.dif1, order=c(0,0,2),include.mean=FALSE, method="ML") ###ARIMA (0,1,2)
arima4<-arima(data.dif1, order=c(1,0,2),include.mean=FALSE, method="ML") ###ARIMA (1,1,2)
arima5<-arima(data.dif1, order = c(2,0,1), include.mean = FALSE, method = "ML") ###ARIMA (2,1,1)

5 Uji Signifikansi Parameter

arima1
## 
## Call:
## arima(x = data.dif1, order = c(1, 0, 1), include.mean = FALSE, method = "ML")
## 
## Coefficients:
##           ar1     ma1
##       -0.4503  0.1710
## s.e.   0.3437  0.3716
## 
## sigma^2 estimated as 134791258:  log likelihood = -506.64,  aic = 1017.28
arima2
## 
## Call:
## arima(x = data.dif1, order = c(0, 0, 1), include.mean = FALSE, method = "ML")
## 
## Coefficients:
##           ma1
##       -0.2582
## s.e.   0.1344
## 
## sigma^2 estimated as 137353933:  log likelihood = -507.07,  aic = 1016.14
arima3
## 
## Call:
## arima(x = data.dif1, order = c(0, 0, 2), include.mean = FALSE, method = "ML")
## 
## Coefficients:
##           ma1     ma2
##       -0.2449  0.1556
## s.e.   0.1422  0.1806
## 
## sigma^2 estimated as 135179959:  log likelihood = -506.71,  aic = 1017.41
arima4
## 
## Call:
## arima(x = data.dif1, order = c(1, 0, 2), include.mean = FALSE, method = "ML")
## 
## Coefficients:
##           ar1     ma1     ma2
##       -0.2973  0.0344  0.0994
## s.e.   0.5237  0.5098  0.2363
## 
## sigma^2 estimated as 134314264:  log likelihood = -506.56,  aic = 1019.13
arima5
## 
## Call:
## arima(x = data.dif1, order = c(2, 0, 1), include.mean = FALSE, method = "ML")
## 
## Coefficients:
##           ar1     ar2      ma1
##       -0.2709  0.0691  -0.0018
## s.e.   0.8788  0.2942   0.8701
## 
## sigma^2 estimated as 134657490:  log likelihood = -506.62,  aic = 1019.24

5.1 AIC

aic.arima <- data.frame(
  Model = c("ARIMA(1,1,1)", "ARIMA(2,1,1)", 
            "ARIMA(1,1,2)", "ARIMA(2,1,2)", "ARIMA(0,1,1)"),
  AIC = c(arima1$aic, arima2$aic, arima3$aic, arima4$aic, arima5$aic)
)

aic.arima

Model terbaik adalah ARIMA (2,1,1) karena memiliki AIC terendah (1016.139 )

6 Eksploratif

arima.y<-arima(data.training.ts,  order=c(2,1,1), include.mean = FALSE,method="ML") ###ARIMA (2,1,1)
sisaan <- arima.y$residuals

# Eksplorasi
par(mfrow=c(2,2))
qqnorm(sisaan)
qqline(sisaan, col = "blue", lwd = 2)
plot(c(1:length(sisaan)),sisaan)
acf(sisaan,lag.max = 10)
pacf(sisaan,lag.max = 10)

Normalitas: Normal Q-Q Plot menunjukkan bahwa sisaan mengikuti sebaran normal karena titik-titik berada di sekitar garis. ACF dan PACF tidak menunjukkan adanya lag yang signifikan, menunjukkan bahwa tidak ada autokorelasi pada residual.

6.1 Uji Non-Autokorelasi dengan Ljung-Box test

Box.test(sisaan, type = "Ljung")
## 
##  Box-Ljung test
## 
## data:  sisaan
## X-squared = 0.039794, df = 1, p-value = 0.8419

Berdasarkan hasil uji Ljung-Box di atas dapat disimpulkan bahwa H0 tidak ditolak karena p-value= 0.8419 > 0.05 yang berarti tidak terdapat gejala autokorelasi pada sisaan dari model ARIMA (2,1,1) tidak berkorelasi.

6.2 Uji Normalitas dengan Shapiro-Wilk test

shapiro.test(sisaan)
## 
##  Shapiro-Wilk normality test
## 
## data:  sisaan
## W = 0.95298, p-value = 0.05262

Berdasarkan hasil uji Shapiro-Wilk di atas dapat disimpulkan bahwa h0 tidak ditolak karena p-value = 0.05262 > 0.05 sehingga sisaan dari model ARIMA (2,1,1) mengikuti sebaran Normal.

#plot dugaan dengan data asli
dugaan <- fitted(arima.y)
cbind(data.training.ts,dugaan)
##          data.training.ts    dugaan
## Jan 2020           113720 113606.28
## Feb 2020           111248 113606.08
## Mar 2020           103755 111952.35
## Apr 2020            71973 105628.53
## May 2020            63245  80124.72
## Jun 2020            81201  63442.24
## Jul 2020            91184  75701.66
## Aug 2020            88649  89693.41
## Sep 2020            93786  90027.77
## Oct 2020            95848  92212.42
## Nov 2020            96790  95638.07
## Dec 2020            92974  96675.31
## Jan 2021            90835  94079.48
## Feb 2021            86823  91156.42
## Mar 2021           103847  87769.70
## Apr 2021           101158  98929.16
## May 2021            94907 103059.45
## Jun 2021           100407  96429.02
## Jul 2021            77208  98477.79
## Aug 2021            83027  83910.78
## Sep 2021            97551  79848.33
## Oct 2021           103294  93987.20
## Nov 2021           109700 102725.75
## Dec 2021           117707 108349.22
## Jan 2022           119978 115964.10
## Feb 2022           101012 119909.20
## Mar 2022           116623 106340.59
## Apr 2022           115877 111064.42
## May 2022           111053 117149.80
## Jun 2022           124885 112319.11
## Jul 2022           124704 120782.00
## Aug 2022           138302 125702.33
## Sep 2022           137590 134583.32
## Oct 2022           137664 138717.63
## Nov 2022           141651 137596.60
## Dec 2022           145448 140568.81
## Jan 2023           151496 144686.34
## Feb 2023           139225 150107.96
## Mar 2023           157815 142986.75
## Apr 2023           128882 151904.16
## May 2023           160169 138046.24
## Jun 2023           150057 149653.59
## Jul 2023           165814 154958.69
## Aug 2023           177506 160826.96
## Sep 2023           174467 175398.28
## Oct 2023           185192 176100.25
## Nov 2023           183230 182060.28
## Dec 2023           176216 184500.90
plot.ts(data.training.ts, xlab="Month", ylab="Data")
points(data.training.ts)
par(col="red")
lines(dugaan)

par(col="black")

7 Ramalan

# forecast
h <- length(data.test.ts)

ramalan_arima <- forecast::forecast(
  data.training.ts,
  model = arima.y,
  h = h
)

fc <- ramalan_arima
fc
##          Point Forecast    Lo 80    Hi 80     Lo 95    Hi 95
## Jan 2024       177995.2 163123.8 192866.6 155251.40 200739.0
## Feb 2024       177028.3 158639.5 195417.1 148905.05 205151.6
## Mar 2024       177413.3 154926.2 199900.3 143022.29 211804.2
## Apr 2024       177242.1 151712.8 202771.5 138198.33 216285.9
## May 2024       177315.1 148904.9 205725.2 133865.53 220764.7
## Jun 2024       177283.5 146323.6 208243.4 129934.44 224632.5
## Jul 2024       177297.1 143956.1 210638.1 126306.47 228287.7
## Aug 2024       177291.2 141738.7 212843.8 122918.34 231664.1
## Sep 2024       177293.8 139655.2 214932.4 119730.48 234857.0
## Oct 2024       177292.7 137679.4 216905.9 116709.45 237875.9
## Nov 2024       177293.1 135798.4 218787.9 113832.44 240753.8
## Dec 2024       177292.9 133998.7 220587.2 111080.13 243505.7
## Jan 2025       177293.0 132271.0 222315.0 108437.84 246148.2
## Feb 2025       177293.0 130607.2 223978.8 105893.23 248692.7
## Mar 2025       177293.0 129000.7 225585.3 103436.25 251149.8
## Apr 2025       177293.0 127445.9 227140.1 101058.41 253527.6
## May 2025       177293.0 125938.1 228647.9  98752.53 255833.5
## Jun 2025       177293.0 124473.4 230112.6  96512.45 258073.6
## Jul 2025       177293.0 123048.3 231537.7  94332.82 260253.2
## Aug 2025       177293.0 121659.6 232926.4  92209.02 262377.0
## Sep 2025       177293.0 120304.7 234281.3  90136.95 264449.0
plot(fc)

forecast_mean <- as.numeric(fc$mean)
lo80 <- as.numeric(fc$lower[,"80%"])
hi80 <- as.numeric(fc$upper[,"80%"])
lo95 <- as.numeric(fc$lower[,"95%"])
hi95 <- as.numeric(fc$upper[,"95%"])

gabungan <- data.frame(
  Aktual = as.numeric(data.test.ts),
  Ramalan = forecast_mean,
  Lo80 = lo80,
  Hi80 = hi80,
  Lo95 = lo95,
  Hi95 = hi95
)

gabungan
accuracy(ramalan_arima, data.test.ts) #arima
##                     ME     RMSE       MAE       MPE     MAPE      MASE
## Training set  1598.616 11482.70  8802.335 0.3598824 8.287991 0.3235122
## Test set     17789.745 20548.98 17789.745 8.8659687 8.865969 0.6538264
##                     ACF1 Theil's U
## Training set -0.02791608        NA
## Test set      0.21146120  1.557217
#forecast
(ramalan_arima<- forecast::forecast(data.ts,model=arima.y, h=12))
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Oct 2025       203494.2 188622.8 218365.6 180750.4 226238.0
## Nov 2025       204243.3 185854.4 222632.1 176120.0 232366.5
## Dec 2025       203921.2 181434.2 226408.3 169530.3 238312.2
## Jan 2026       204060.3 178530.9 229589.6 165016.5 243104.1
## Feb 2026       204000.3 175590.2 232410.5 160550.8 247449.9
## Mar 2026       204026.2 173066.3 234986.1 156677.1 251375.2
## Apr 2026       204015.0 170674.0 237356.0 153024.4 255005.7
## May 2026       204019.8 168467.3 239572.4 149646.9 258392.7
## Jun 2026       204017.8 166379.2 241656.4 146454.5 261581.0
## Jul 2026       204018.7 164405.4 243631.9 143435.4 264601.9
## Aug 2026       204018.3 162523.5 245513.0 140557.6 267479.0
## Sep 2026       204018.4 160724.2 247312.7 137805.6 270231.2