## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Registered S3 methods overwritten by 'TSA':
## method from
## fitted.Arima forecast
## plot.Arima forecast
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Data yang digunakan merupakan data jumlah rujukan FKTP ke FKRTL BPJS Kesehatan cabang Yogyakarta dari januari 2020 - September 2025.
kasus <- read.csv(file = "D:/Kuliah/Magang/PKL 2025/BPJS kes/Laporan/data/kasus.csv")
kasus$Jumlah <- gsub(",", "", kasus$Jumlah)
kasus$Jumlah <- trimws(kasus$Jumlah)
kasus$Jumlah <- as.numeric(kasus$Jumlah)
str(kasus)## 'data.frame': 69 obs. of 2 variables:
## $ Bulan : chr "2020-01" "2020-02" "2020-03" "2020-04" ...
## $ Jumlah: num 113720 111248 103755 71973 63245 ...
## Bulan Jumlah
## Length:69 Min. : 63245
## Class :character 1st Qu.:101158
## Mode :character Median :138302
## Mean :142507
## 3rd Qu.:185192
## Max. :213899
data.ts <- ts(kasus$Jumlah,start = c(2020, 1),frequency = 12)
plot(data.ts, main = "Jumlah Kasus", ylab = "Total", xlab = "Periode")n <- nrow(kasus)
train_size <- floor(0.7 * n)
data.training <- kasus[1:train_size, ]
data.test <- kasus[(train_size + 1):n, ]data.training.ts <- ts(
data.training$Jumlah,
start = c(2020, 1),
frequency = 12
)
data.test.ts <- ts(
data.test$Jumlah,
start = c(2020, 1 + train_size),
frequency = 12
)# data training
plot.ts(
data.training$Jumlah,
main = "Data Training Kasus",
xlab = "Bulan",
ylab = "Jumlah"
)Berdasarkan plot deret waktu diatas, terlihat bahwa data training dan testing cenderung memiliki pola trend.
Plot ACF memperlihatkan pola penurunan autokorelasi yang berlangsung
secara perlahan, yang menjadi indikasi bahwa data tidak dalam kondisi
stasioner.
##
## Augmented Dickey-Fuller Test
##
## data: data.training.ts
## Dickey-Fuller = -2.7523, Lag order = 3, p-value = 0.2737
## alternative hypothesis: stationary
Nilai p-value sebesar 0.2737 yang lebih tinggi dari tingkat signifikansi 0.05 mengindikasikan bahwa H0 tidak ditolak, sehingga data belum stasioneritas.
# Differencing Ordo 1
data.dif1 <- diff(data.training.ts, difference=1)
plot.ts(data.dif1, lty=1, xlab="Waktu", ylab="Data Penggna Diff Ordo 1")
points(data.dif1)## Warning in adf.test(data.dif1): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: data.dif1
## Dickey-Fuller = -6.2336, Lag order = 3, p-value = 0.01
## alternative hypothesis: stationary
Nilai p-value sebesar 0.01 yang lebih kecil dari 0.05 menunjukkan bahwa H0 ditolak, sehingga deret waktu pada diferensiasi pertama dapat dikatakan telah stasioner.
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10
## 0 x o o o o o o o o o o
## 1 o o o o o o o o o o o
## 2 x o o o o o o o o o o
## 3 o o o o o o o o o o o
## 4 x o o o o o o o o o o
## 5 x o o o o o o o o o o
## 6 x o o o o o o o o o o
## 7 x o o o o o o o o o o
## 8 x o o o o o o o o o o
## 9 x x x o o o o o o o o
## 10 x o o o o o o o o o o
arima1<-arima(data.dif1, order=c(1,0,1),include.mean=FALSE, method="ML") ###ARIMA (1,1,1)
arima2<-arima(data.dif1, order=c(0,0,1),include.mean=FALSE, method="ML") ###ARIMA (0,1,1)
arima3<-arima(data.dif1, order=c(0,0,2),include.mean=FALSE, method="ML") ###ARIMA (0,1,2)
arima4<-arima(data.dif1, order=c(1,0,2),include.mean=FALSE, method="ML") ###ARIMA (1,1,2)
arima5<-arima(data.dif1, order = c(2,0,1), include.mean = FALSE, method = "ML") ###ARIMA (2,1,1)##
## Call:
## arima(x = data.dif1, order = c(1, 0, 1), include.mean = FALSE, method = "ML")
##
## Coefficients:
## ar1 ma1
## -0.4503 0.1710
## s.e. 0.3437 0.3716
##
## sigma^2 estimated as 134791258: log likelihood = -506.64, aic = 1017.28
##
## Call:
## arima(x = data.dif1, order = c(0, 0, 1), include.mean = FALSE, method = "ML")
##
## Coefficients:
## ma1
## -0.2582
## s.e. 0.1344
##
## sigma^2 estimated as 137353933: log likelihood = -507.07, aic = 1016.14
##
## Call:
## arima(x = data.dif1, order = c(0, 0, 2), include.mean = FALSE, method = "ML")
##
## Coefficients:
## ma1 ma2
## -0.2449 0.1556
## s.e. 0.1422 0.1806
##
## sigma^2 estimated as 135179959: log likelihood = -506.71, aic = 1017.41
##
## Call:
## arima(x = data.dif1, order = c(1, 0, 2), include.mean = FALSE, method = "ML")
##
## Coefficients:
## ar1 ma1 ma2
## -0.2973 0.0344 0.0994
## s.e. 0.5237 0.5098 0.2363
##
## sigma^2 estimated as 134314264: log likelihood = -506.56, aic = 1019.13
##
## Call:
## arima(x = data.dif1, order = c(2, 0, 1), include.mean = FALSE, method = "ML")
##
## Coefficients:
## ar1 ar2 ma1
## -0.2709 0.0691 -0.0018
## s.e. 0.8788 0.2942 0.8701
##
## sigma^2 estimated as 134657490: log likelihood = -506.62, aic = 1019.24
arima.y<-arima(data.training.ts, order=c(2,1,1), include.mean = FALSE,method="ML") ###ARIMA (2,1,1)
sisaan <- arima.y$residuals
# Eksplorasi
par(mfrow=c(2,2))
qqnorm(sisaan)
qqline(sisaan, col = "blue", lwd = 2)
plot(c(1:length(sisaan)),sisaan)
acf(sisaan,lag.max = 10)
pacf(sisaan,lag.max = 10)Normalitas: Normal Q-Q Plot menunjukkan bahwa sisaan mengikuti sebaran normal karena titik-titik berada di sekitar garis. ACF dan PACF tidak menunjukkan adanya lag yang signifikan, menunjukkan bahwa tidak ada autokorelasi pada residual.
##
## Box-Ljung test
##
## data: sisaan
## X-squared = 0.039794, df = 1, p-value = 0.8419
Berdasarkan hasil uji Ljung-Box di atas dapat disimpulkan bahwa H0 tidak ditolak karena p-value= 0.8419 > 0.05 yang berarti tidak terdapat gejala autokorelasi pada sisaan dari model ARIMA (2,1,1) tidak berkorelasi.
##
## Shapiro-Wilk normality test
##
## data: sisaan
## W = 0.95298, p-value = 0.05262
Berdasarkan hasil uji Shapiro-Wilk di atas dapat disimpulkan bahwa h0 tidak ditolak karena p-value = 0.05262 > 0.05 sehingga sisaan dari model ARIMA (2,1,1) mengikuti sebaran Normal.
## data.training.ts dugaan
## Jan 2020 113720 113606.28
## Feb 2020 111248 113606.08
## Mar 2020 103755 111952.35
## Apr 2020 71973 105628.53
## May 2020 63245 80124.72
## Jun 2020 81201 63442.24
## Jul 2020 91184 75701.66
## Aug 2020 88649 89693.41
## Sep 2020 93786 90027.77
## Oct 2020 95848 92212.42
## Nov 2020 96790 95638.07
## Dec 2020 92974 96675.31
## Jan 2021 90835 94079.48
## Feb 2021 86823 91156.42
## Mar 2021 103847 87769.70
## Apr 2021 101158 98929.16
## May 2021 94907 103059.45
## Jun 2021 100407 96429.02
## Jul 2021 77208 98477.79
## Aug 2021 83027 83910.78
## Sep 2021 97551 79848.33
## Oct 2021 103294 93987.20
## Nov 2021 109700 102725.75
## Dec 2021 117707 108349.22
## Jan 2022 119978 115964.10
## Feb 2022 101012 119909.20
## Mar 2022 116623 106340.59
## Apr 2022 115877 111064.42
## May 2022 111053 117149.80
## Jun 2022 124885 112319.11
## Jul 2022 124704 120782.00
## Aug 2022 138302 125702.33
## Sep 2022 137590 134583.32
## Oct 2022 137664 138717.63
## Nov 2022 141651 137596.60
## Dec 2022 145448 140568.81
## Jan 2023 151496 144686.34
## Feb 2023 139225 150107.96
## Mar 2023 157815 142986.75
## Apr 2023 128882 151904.16
## May 2023 160169 138046.24
## Jun 2023 150057 149653.59
## Jul 2023 165814 154958.69
## Aug 2023 177506 160826.96
## Sep 2023 174467 175398.28
## Oct 2023 185192 176100.25
## Nov 2023 183230 182060.28
## Dec 2023 176216 184500.90
plot.ts(data.training.ts, xlab="Month", ylab="Data")
points(data.training.ts)
par(col="red")
lines(dugaan)# forecast
h <- length(data.test.ts)
ramalan_arima <- forecast::forecast(
data.training.ts,
model = arima.y,
h = h
)
fc <- ramalan_arima
fc## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2024 177995.2 163123.8 192866.6 155251.40 200739.0
## Feb 2024 177028.3 158639.5 195417.1 148905.05 205151.6
## Mar 2024 177413.3 154926.2 199900.3 143022.29 211804.2
## Apr 2024 177242.1 151712.8 202771.5 138198.33 216285.9
## May 2024 177315.1 148904.9 205725.2 133865.53 220764.7
## Jun 2024 177283.5 146323.6 208243.4 129934.44 224632.5
## Jul 2024 177297.1 143956.1 210638.1 126306.47 228287.7
## Aug 2024 177291.2 141738.7 212843.8 122918.34 231664.1
## Sep 2024 177293.8 139655.2 214932.4 119730.48 234857.0
## Oct 2024 177292.7 137679.4 216905.9 116709.45 237875.9
## Nov 2024 177293.1 135798.4 218787.9 113832.44 240753.8
## Dec 2024 177292.9 133998.7 220587.2 111080.13 243505.7
## Jan 2025 177293.0 132271.0 222315.0 108437.84 246148.2
## Feb 2025 177293.0 130607.2 223978.8 105893.23 248692.7
## Mar 2025 177293.0 129000.7 225585.3 103436.25 251149.8
## Apr 2025 177293.0 127445.9 227140.1 101058.41 253527.6
## May 2025 177293.0 125938.1 228647.9 98752.53 255833.5
## Jun 2025 177293.0 124473.4 230112.6 96512.45 258073.6
## Jul 2025 177293.0 123048.3 231537.7 94332.82 260253.2
## Aug 2025 177293.0 121659.6 232926.4 92209.02 262377.0
## Sep 2025 177293.0 120304.7 234281.3 90136.95 264449.0
forecast_mean <- as.numeric(fc$mean)
lo80 <- as.numeric(fc$lower[,"80%"])
hi80 <- as.numeric(fc$upper[,"80%"])
lo95 <- as.numeric(fc$lower[,"95%"])
hi95 <- as.numeric(fc$upper[,"95%"])
gabungan <- data.frame(
Aktual = as.numeric(data.test.ts),
Ramalan = forecast_mean,
Lo80 = lo80,
Hi80 = hi80,
Lo95 = lo95,
Hi95 = hi95
)
gabungan## ME RMSE MAE MPE MAPE MASE
## Training set 1598.616 11482.70 8802.335 0.3598824 8.287991 0.3235122
## Test set 17789.745 20548.98 17789.745 8.8659687 8.865969 0.6538264
## ACF1 Theil's U
## Training set -0.02791608 NA
## Test set 0.21146120 1.557217
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Oct 2025 203494.2 188622.8 218365.6 180750.4 226238.0
## Nov 2025 204243.3 185854.4 222632.1 176120.0 232366.5
## Dec 2025 203921.2 181434.2 226408.3 169530.3 238312.2
## Jan 2026 204060.3 178530.9 229589.6 165016.5 243104.1
## Feb 2026 204000.3 175590.2 232410.5 160550.8 247449.9
## Mar 2026 204026.2 173066.3 234986.1 156677.1 251375.2
## Apr 2026 204015.0 170674.0 237356.0 153024.4 255005.7
## May 2026 204019.8 168467.3 239572.4 149646.9 258392.7
## Jun 2026 204017.8 166379.2 241656.4 146454.5 261581.0
## Jul 2026 204018.7 164405.4 243631.9 143435.4 264601.9
## Aug 2026 204018.3 162523.5 245513.0 140557.6 267479.0
## Sep 2026 204018.4 160724.2 247312.7 137805.6 270231.2