library("tseries")
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library("forecast")
library("TTR")
library("TSA")
## Registered S3 methods overwritten by 'TSA':
## method from
## fitted.Arima forecast
## plot.Arima forecast
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
library("graphics")
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("ggplot2")
Data yang digunakan merupakan data jumlah rujukan FKTP ke FKRTL BPJS Kesehatan cabang Yogyakarta dari januari 2020-September 2025.
kasus <- read.csv(file = "D:/Kuliah/Magang/PKL 2025/BPJS kes/Laporan/data/kasus.csv")
kasus$Jumlah <- gsub(",", "", kasus$Jumlah)
kasus$Jumlah <- trimws(kasus$Jumlah)
kasus$Jumlah <- as.numeric(kasus$Jumlah)
str(kasus)
## 'data.frame': 69 obs. of 2 variables:
## $ Bulan : chr "2020-01" "2020-02" "2020-03" "2020-04" ...
## $ Jumlah: num 113720 111248 103755 71973 63245 ...
summary(kasus)
## Bulan Jumlah
## Length:69 Min. : 63245
## Class :character 1st Qu.:101158
## Mode :character Median :138302
## Mean :142507
## 3rd Qu.:185192
## Max. :213899
data.ts <- ts(kasus$Jumlah,start = c(2020, 1),frequency = 12)
plot(data.ts, main = "Jumlah Kasus", ylab = "Total", xlab = "Periode")
n <- nrow(kasus)
train_size <- floor(0.7 * n)
data.training <- kasus[1:train_size, ]
data.test <- kasus[(train_size + 1):n, ]
data.training.ts <- ts(
data.training$Jumlah,
start = c(2020, 1),
frequency = 12
)
data.test.ts <- ts(
data.test$Jumlah,
start = c(2020, 1 + train_size),
frequency = 12
)
data.training
## Bulan Jumlah
## 1 2020-01 113720
## 2 2020-02 111248
## 3 2020-03 103755
## 4 2020-04 71973
## 5 2020-05 63245
## 6 2020-06 81201
## 7 2020-07 91184
## 8 2020-08 88649
## 9 2020-09 93786
## 10 2020-10 95848
## 11 2020-11 96790
## 12 2020-12 92974
## 13 2021-01 90835
## 14 2021-02 86823
## 15 2021-03 103847
## 16 2021-04 101158
## 17 2021-05 94907
## 18 2021-06 100407
## 19 2021-07 77208
## 20 2021-08 83027
## 21 2021-09 97551
## 22 2021-10 103294
## 23 2021-11 109700
## 24 2021-12 117707
## 25 2022-01 119978
## 26 2022-02 101012
## 27 2022-03 116623
## 28 2022-04 115877
## 29 2022-05 111053
## 30 2022-06 124885
## 31 2022-07 124704
## 32 2022-08 138302
## 33 2022-09 137590
## 34 2022-10 137664
## 35 2022-11 141651
## 36 2022-12 145448
## 37 2023-01 151496
## 38 2023-02 139225
## 39 2023-03 157815
## 40 2023-04 128882
## 41 2023-05 160169
## 42 2023-06 150057
## 43 2023-07 165814
## 44 2023-08 177506
## 45 2023-09 174467
## 46 2023-10 185192
## 47 2023-11 183230
## 48 2023-12 176216
data.test
## Bulan Jumlah
## 49 2024-01 189233
## 50 2024-02 178354
## 51 2024-03 186960
## 52 2024-04 179445
## 53 2024-05 198102
## 54 2024-06 182709
## 55 2024-07 211162
## 56 2024-08 205278
## 57 2024-09 193871
## 58 2024-10 213899
## 59 2024-11 200299
## 60 2024-12 199279
## 61 2025-01 200965
## 62 2025-02 190526
## 63 2025-03 187920
## 64 2025-04 186841
## 65 2025-05 192122
## 66 2025-06 183892
## 67 2025-07 210053
## 68 2025-08 201133
## 69 2025-09 205217
# data training
plot.ts(
data.training$Jumlah,
main = "Data Training Kasus",
xlab = "Bulan",
ylab = "Jumlah"
)
plot.ts(
data.test$Jumlah,
main = "Data Testing Kasus",
xlab = "Bulan",
ylab = "Jumlah"
)
Berdasarkan plot deret waktu diatas, terlihat bahwa data training dan testing cenderung memiliki pola trend.
acf(
data.training.ts,
main = "Plot ACF Data Training Kasus"
)
Plot ACF memperlihatkan pola penurunan autokorelasi yang berlangsung
secara perlahan, yang menjadi indikasi bahwa data tidak dalam kondisi
stasioner.
adf.test(data.training.ts)
##
## Augmented Dickey-Fuller Test
##
## data: data.training.ts
## Dickey-Fuller = -2.7523, Lag order = 3, p-value = 0.2737
## alternative hypothesis: stationary
Nilai p-value sebesar 0.2737 yang lebih tinggi dari tingkat signifikansi 0.05 mengindikasikan bahwa H0 tidak ditolak, sehingga data belum stasioneritas.
# Differencing Ordo 1
data.dif1 <- diff(data.training.ts, difference=1)
plot.ts(data.dif1, lty=1, xlab="Waktu", ylab="Data Penggna Diff Ordo 1")
points(data.dif1)
# Cek Kestasioneran
acf(data.dif1, main="Plot ACF Data kasus setelah Diff 1", lag.max = 10)
adf.test(data.dif1) # Augmented Dickey-Fuller
## Warning in adf.test(data.dif1): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: data.dif1
## Dickey-Fuller = -6.2336, Lag order = 3, p-value = 0.01
## alternative hypothesis: stationary
Nilai p-value sebesar 0.01 yang lebih kecil dari 0.05 menunjukkan bahwa H0 ditolak, sehingga deret waktu pada diferensiasi pertama dapat dikatakan telah stasioner.
pacf(data.dif1, main="Plot PACF Data kasus ", lag.max=10)
eacf(data.dif1, ar.max = 10,ma.max = 10)
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10
## 0 x o o o o o o o o o o
## 1 o o o o o o o o o o o
## 2 x o o o o o o o o o o
## 3 o o o o o o o o o o o
## 4 x o o o o o o o o o o
## 5 x o o o o o o o o o o
## 6 x o o o o o o o o o o
## 7 x o o o o o o o o o o
## 8 x o o o o o o o o o o
## 9 x x x o o o o o o o o
## 10 x o o o o o o o o o o
arima1<-arima(data.dif1, order=c(1,0,1),include.mean=FALSE, method="ML") ###ARIMA (1,1,1)
arima2<-arima(data.dif1, order=c(0,0,1),include.mean=FALSE, method="ML") ###ARIMA (0,1,1)
arima3<-arima(data.dif1, order=c(0,0,2),include.mean=FALSE, method="ML") ###ARIMA (0,1,2)
arima4<-arima(data.dif1, order=c(1,0,2),include.mean=FALSE, method="ML") ###ARIMA (1,1,2)
arima5<-arima(data.dif1, order = c(2,0,1), include.mean = FALSE, method = "ML") ###ARIMA (2,1,1)
arima1
##
## Call:
## arima(x = data.dif1, order = c(1, 0, 1), include.mean = FALSE, method = "ML")
##
## Coefficients:
## ar1 ma1
## -0.4503 0.1710
## s.e. 0.3437 0.3716
##
## sigma^2 estimated as 134791258: log likelihood = -506.64, aic = 1017.28
arima2
##
## Call:
## arima(x = data.dif1, order = c(0, 0, 1), include.mean = FALSE, method = "ML")
##
## Coefficients:
## ma1
## -0.2582
## s.e. 0.1344
##
## sigma^2 estimated as 137353933: log likelihood = -507.07, aic = 1016.14
arima3
##
## Call:
## arima(x = data.dif1, order = c(0, 0, 2), include.mean = FALSE, method = "ML")
##
## Coefficients:
## ma1 ma2
## -0.2449 0.1556
## s.e. 0.1422 0.1806
##
## sigma^2 estimated as 135179959: log likelihood = -506.71, aic = 1017.41
arima4
##
## Call:
## arima(x = data.dif1, order = c(1, 0, 2), include.mean = FALSE, method = "ML")
##
## Coefficients:
## ar1 ma1 ma2
## -0.2973 0.0344 0.0994
## s.e. 0.5237 0.5098 0.2363
##
## sigma^2 estimated as 134314264: log likelihood = -506.56, aic = 1019.13
arima5
##
## Call:
## arima(x = data.dif1, order = c(2, 0, 1), include.mean = FALSE, method = "ML")
##
## Coefficients:
## ar1 ar2 ma1
## -0.2709 0.0691 -0.0018
## s.e. 0.8788 0.2942 0.8701
##
## sigma^2 estimated as 134657490: log likelihood = -506.62, aic = 1019.24
aic.arima <- data.frame(
Model = c("ARIMA(1,1,1)", "ARIMA(2,1,1)",
"ARIMA(1,1,2)", "ARIMA(2,1,2)", "ARIMA(0,1,1)"),
AIC = c(arima1$aic, arima2$aic, arima3$aic, arima4$aic, arima5$aic)
)
aic.arima
## Model AIC
## 1 ARIMA(1,1,1) 1017.280
## 2 ARIMA(2,1,1) 1016.139
## 3 ARIMA(1,1,2) 1017.415
## 4 ARIMA(2,1,2) 1019.128
## 5 ARIMA(0,1,1) 1019.237
Model terbaik adalah ARIMA (2,1,1) karena memiliki AIC terendah (1016.139 )
arima.y<-arima(data.training.ts, order=c(2,1,1), include.mean = FALSE,method="ML") ###ARIMA (2,1,1)
sisaan <- arima.y$residuals
# Eksplorasi
par(mfrow=c(2,2))
qqnorm(sisaan)
qqline(sisaan, col = "blue", lwd = 2)
plot(c(1:length(sisaan)),sisaan)
acf(sisaan,lag.max = 10)
pacf(sisaan,lag.max = 10)
Normalitas: Normal Q-Q Plot menunjukkan bahwa sisaan mengikuti sebaran normal karena titik-titik berada di sekitar garis. ACF dan PACF tidak menunjukkan adanya lag yang signifikan, menunjukkan bahwa tidak ada autokorelasi pada residual.
Box.test(sisaan, type = "Ljung")
##
## Box-Ljung test
##
## data: sisaan
## X-squared = 0.039794, df = 1, p-value = 0.8419
Berdasarkan hasil uji Ljung-Box di atas dapat disimpulkan bahwa H0 tidak ditolak karena p-value= 0.8419 > 0.05 yang berarti tidak terdapat gejala autokorelasi pada sisaan dari model ARIMA (2,1,1) tidak berkorelasi.
shapiro.test(sisaan)
##
## Shapiro-Wilk normality test
##
## data: sisaan
## W = 0.95298, p-value = 0.05262
Berdasarkan hasil uji Shapiro-Wilk di atas dapat disimpulkan bahwa h0 tidak ditolak karena p-value = 0.05262 > 0.05 sehingga sisaan dari model ARIMA (2,1,1) mengikuti sebaran Normal.
#plot dugaan dengan data asli
dugaan <- fitted(arima.y)
cbind(data.training.ts,dugaan)
## data.training.ts dugaan
## Jan 2020 113720 113606.28
## Feb 2020 111248 113606.08
## Mar 2020 103755 111952.35
## Apr 2020 71973 105628.53
## May 2020 63245 80124.72
## Jun 2020 81201 63442.24
## Jul 2020 91184 75701.66
## Aug 2020 88649 89693.41
## Sep 2020 93786 90027.77
## Oct 2020 95848 92212.42
## Nov 2020 96790 95638.07
## Dec 2020 92974 96675.31
## Jan 2021 90835 94079.48
## Feb 2021 86823 91156.42
## Mar 2021 103847 87769.70
## Apr 2021 101158 98929.16
## May 2021 94907 103059.45
## Jun 2021 100407 96429.02
## Jul 2021 77208 98477.79
## Aug 2021 83027 83910.78
## Sep 2021 97551 79848.33
## Oct 2021 103294 93987.20
## Nov 2021 109700 102725.75
## Dec 2021 117707 108349.22
## Jan 2022 119978 115964.10
## Feb 2022 101012 119909.20
## Mar 2022 116623 106340.59
## Apr 2022 115877 111064.42
## May 2022 111053 117149.80
## Jun 2022 124885 112319.11
## Jul 2022 124704 120782.00
## Aug 2022 138302 125702.33
## Sep 2022 137590 134583.32
## Oct 2022 137664 138717.63
## Nov 2022 141651 137596.60
## Dec 2022 145448 140568.81
## Jan 2023 151496 144686.34
## Feb 2023 139225 150107.96
## Mar 2023 157815 142986.75
## Apr 2023 128882 151904.16
## May 2023 160169 138046.24
## Jun 2023 150057 149653.59
## Jul 2023 165814 154958.69
## Aug 2023 177506 160826.96
## Sep 2023 174467 175398.28
## Oct 2023 185192 176100.25
## Nov 2023 183230 182060.28
## Dec 2023 176216 184500.90
plot.ts(data.training.ts, xlab="Month", ylab="Data")
points(data.training.ts)
par(col="red")
lines(dugaan)
par(col="black")
# forecast
h <- length(data.test.ts)
ramalan_arima <- forecast::forecast(
data.training.ts,
model = arima.y,
h = h
)
fc <- ramalan_arima
fc
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2024 177995.2 163123.8 192866.6 155251.40 200739.0
## Feb 2024 177028.3 158639.5 195417.1 148905.05 205151.6
## Mar 2024 177413.3 154926.2 199900.3 143022.29 211804.2
## Apr 2024 177242.1 151712.8 202771.5 138198.33 216285.9
## May 2024 177315.1 148904.9 205725.2 133865.53 220764.7
## Jun 2024 177283.5 146323.6 208243.4 129934.44 224632.5
## Jul 2024 177297.1 143956.1 210638.1 126306.47 228287.7
## Aug 2024 177291.2 141738.7 212843.8 122918.34 231664.1
## Sep 2024 177293.8 139655.2 214932.4 119730.48 234857.0
## Oct 2024 177292.7 137679.4 216905.9 116709.45 237875.9
## Nov 2024 177293.1 135798.4 218787.9 113832.44 240753.8
## Dec 2024 177292.9 133998.7 220587.2 111080.13 243505.7
## Jan 2025 177293.0 132271.0 222315.0 108437.84 246148.2
## Feb 2025 177293.0 130607.2 223978.8 105893.23 248692.7
## Mar 2025 177293.0 129000.7 225585.3 103436.25 251149.8
## Apr 2025 177293.0 127445.9 227140.1 101058.41 253527.6
## May 2025 177293.0 125938.1 228647.9 98752.53 255833.5
## Jun 2025 177293.0 124473.4 230112.6 96512.45 258073.6
## Jul 2025 177293.0 123048.3 231537.7 94332.82 260253.2
## Aug 2025 177293.0 121659.6 232926.4 92209.02 262377.0
## Sep 2025 177293.0 120304.7 234281.3 90136.95 264449.0
plot(fc)
forecast_mean <- as.numeric(fc$mean)
lo80 <- as.numeric(fc$lower[,"80%"])
hi80 <- as.numeric(fc$upper[,"80%"])
lo95 <- as.numeric(fc$lower[,"95%"])
hi95 <- as.numeric(fc$upper[,"95%"])
gabungan <- data.frame(
Aktual = as.numeric(data.test.ts),
Ramalan = forecast_mean,
Lo80 = lo80,
Hi80 = hi80,
Lo95 = lo95,
Hi95 = hi95
)
gabungan
## Aktual Ramalan Lo80 Hi80 Lo95 Hi95
## 1 189233 177995.2 163123.8 192866.6 155251.40 200739.0
## 2 178354 177028.3 158639.5 195417.1 148905.05 205151.6
## 3 186960 177413.3 154926.2 199900.3 143022.29 211804.2
## 4 179445 177242.1 151712.8 202771.5 138198.33 216285.9
## 5 198102 177315.1 148904.9 205725.2 133865.53 220764.7
## 6 182709 177283.5 146323.6 208243.4 129934.44 224632.5
## 7 211162 177297.1 143956.1 210638.1 126306.47 228287.7
## 8 205278 177291.2 141738.7 212843.8 122918.34 231664.1
## 9 193871 177293.8 139655.2 214932.4 119730.48 234857.0
## 10 213899 177292.7 137679.4 216905.9 116709.45 237875.9
## 11 200299 177293.1 135798.4 218787.9 113832.44 240753.8
## 12 199279 177292.9 133998.7 220587.2 111080.13 243505.7
## 13 200965 177293.0 132271.0 222315.0 108437.84 246148.2
## 14 190526 177293.0 130607.2 223978.8 105893.23 248692.7
## 15 187920 177293.0 129000.7 225585.3 103436.25 251149.8
## 16 186841 177293.0 127445.9 227140.1 101058.41 253527.6
## 17 192122 177293.0 125938.1 228647.9 98752.53 255833.5
## 18 183892 177293.0 124473.4 230112.6 96512.45 258073.6
## 19 210053 177293.0 123048.3 231537.7 94332.82 260253.2
## 20 201133 177293.0 121659.6 232926.4 92209.02 262377.0
## 21 205217 177293.0 120304.7 234281.3 90136.95 264449.0
accuracy(ramalan_arima, data.test.ts) #arima
## ME RMSE MAE MPE MAPE MASE
## Training set 1598.616 11482.70 8802.335 0.3598824 8.287991 0.3235122
## Test set 17789.745 20548.98 17789.745 8.8659687 8.865969 0.6538264
## ACF1 Theil's U
## Training set -0.02791608 NA
## Test set 0.21146120 1.557217
#forecast
(ramalan_arima<- forecast::forecast(data.ts,model=arima.y, h=12))
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Oct 2025 203494.2 188622.8 218365.6 180750.4 226238.0
## Nov 2025 204243.3 185854.4 222632.1 176120.0 232366.5
## Dec 2025 203921.2 181434.2 226408.3 169530.3 238312.2
## Jan 2026 204060.3 178530.9 229589.6 165016.5 243104.1
## Feb 2026 204000.3 175590.2 232410.5 160550.8 247449.9
## Mar 2026 204026.2 173066.3 234986.1 156677.1 251375.2
## Apr 2026 204015.0 170674.0 237356.0 153024.4 255005.7
## May 2026 204019.8 168467.3 239572.4 149646.9 258392.7
## Jun 2026 204017.8 166379.2 241656.4 146454.5 261581.0
## Jul 2026 204018.7 164405.4 243631.9 143435.4 264601.9
## Aug 2026 204018.3 162523.5 245513.0 140557.6 267479.0
## Sep 2026 204018.4 160724.2 247312.7 137805.6 270231.2