library(readr)
library(dplyr)
library(forecast)
library(tseries)
library(MASS)
library(ggplot2)
data_ppn <- read_csv(
  "C:/Users/rizki/OneDrive/Documents/MAGANG (smt 5)/SEMESTER 5/Laporan Akhir/Mini Projek/sby.csv"
)

str(data_ppn)
## spc_tbl_ [60 × 3] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Tahun: num [1:60] 2019 2019 2019 2019 2019 ...
##  $ Bulan: chr [1:60] "Januari" "Februari" "Maret" "April" ...
##  $ PPN  : num [1:60] 3.9e+10 3.4e+10 3.4e+10 2.9e+10 4.2e+10 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Tahun = col_double(),
##   ..   Bulan = col_character(),
##   ..   PPN = col_number()
##   .. )
##  - attr(*, "problems")=<externalptr>
head(data_ppn)
## # A tibble: 6 × 3
##   Tahun Bulan            PPN
##   <dbl> <chr>          <dbl>
## 1  2019 Januari  39000000000
## 2  2019 Februari 34000000000
## 3  2019 Maret    34000000000
## 4  2019 April    29000000000
## 5  2019 Mei      42000000000
## 6  2019 Juni     29000000000
ppn_ts <- ts(
  as.numeric(data_ppn$PPN),
  start = c(2019, 1),
  frequency = 12
)
ts.plot(
  ppn_ts,
  col = "dodgerblue2",
  ylab = "Jumlah PPN",
  xlab = "Waktu (Bulan)"
)

title(main = "Penerimaan PPN Bulanan 2019–2023")

points(
  ppn_ts,
  pch = 20,
  col = "red2"
)

adf.test(ppn_ts)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ppn_ts
## Dickey-Fuller = -3.3417, Lag order = 3, p-value = 0.07359
## alternative hypothesis: stationary
kpss.test(ppn_ts)
## 
##  KPSS Test for Level Stationarity
## 
## data:  ppn_ts
## KPSS Level = 0.45202, Truncation lag parameter = 3, p-value = 0.05473
ppn_diff1 <- diff(ppn_ts)

plot(
  ppn_diff1,
  main = "PPN Setelah Differencing Orde 1",
  ylab = "Perubahan PPN",
  xlab = "Waktu"
)

adf.test(ppn_diff1)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ppn_diff1
## Dickey-Fuller = -4.4001, Lag order = 3, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(ppn_diff1)
## 
##  KPSS Test for Level Stationarity
## 
## data:  ppn_diff1
## KPSS Level = 0.041051, Truncation lag parameter = 3, p-value = 0.1
ndiffs(ppn_ts)
## [1] 1
par(mfrow = c(1, 2))

acf(
  ppn_diff1,
  main = "ACF",
  lag.max = 24
)

pacf(
  ppn_diff1,
  main = "PACF",
  lag.max = 24
)

par(mfrow = c(1, 1))
model_110 <- Arima(ppn_ts, order = c(1, 1, 0))
model_011 <- Arima(ppn_ts, order = c(0, 1, 1))
model_111 <- Arima(ppn_ts, order = c(1, 1, 1))
model_112 <- Arima(ppn_ts, order = c(1, 1, 2))

summary(model_111)
## Series: ppn_ts 
## ARIMA(1,1,1) 
## 
## Coefficients:
##          ar1      ma1
##       0.3909  -0.8680
## s.e.  0.1720   0.0995
## 
## sigma^2 = 6.58e+19:  log likelihood = -1429.25
## AIC=2864.49   AICc=2864.93   BIC=2870.72
## 
## Training set error measures:
##                      ME       RMSE        MAE       MPE     MAPE      MASE
## Training set -374860905 7906164662 5943434510 -5.371858 16.81781 0.6031392
##                     ACF1
## Training set -0.04343089
aic_values <- data.frame(
  Model = c(
    "ARIMA(1,1,0)",
    "ARIMA(0,1,1)",
    "ARIMA(1,1,1)",
    "ARIMA(1,1,2)"
  ),
  AIC = c(
    AIC(model_110),
    AIC(model_011),
    AIC(model_111),
    AIC(model_112)
  ),
  BIC = c(
    BIC(model_110),
    BIC(model_011),
    BIC(model_111),
    BIC(model_112)
  )
)

aic_values
##          Model      AIC      BIC
## 1 ARIMA(1,1,0) 2868.445 2872.600
## 2 ARIMA(0,1,1) 2865.148 2869.303
## 3 ARIMA(1,1,1) 2864.490 2870.723
## 4 ARIMA(1,1,2) 2865.739 2874.049
checkresiduals(model_111)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,1)
## Q* = 7.0636, df = 10, p-value = 0.7194
## 
## Model df: 2.   Total lags used: 12
Box.test(
  residuals(model_111),
  lag = 24,
  type = "Ljung-Box"
)
## 
##  Box-Ljung test
## 
## data:  residuals(model_111)
## X-squared = 18.46, df = 24, p-value = 0.7802
forecast_2024 <- forecast(
  model_111,
  h = 12,
  level = c(80, 95)
)

forecast_2024
##          Point Forecast       Lo 80       Hi 80       Lo 95       Hi 95
## Jan 2024    33884391941 23489017444 44279766439 17986038595 49782745288
## Feb 2024    33057487043 21326762204 44788211882 15116891584 50998082502
## Mar 2024    32734283646 20493392768 44975174524 14013456561 51455110731
## Apr 2024    32607956621 20064291393 45151621849 13424076126 51791837115
## May 2024    32558580539 19779190291 45337970787 13014189737 52102971341
## Jun 2024    32539281443 19549675467 45528887419 12673393429 52405169457
## Jul 2024    32531738213 19343169225 45720307202 12361562529 52701913897
## Aug 2024    32528789873 19147219118 45910360627 12063443459 52994136286
## Sep 2024    32527637486 18956960906 46098314067 11773078669 53282196304
## Oct 2024    32527187066 18770446944 46283927188 11488068618 53566305513
## Nov 2024    32527011015 18586861314 46467160716 11207391822 53846630207
## Dec 2024    32526942203 18405830764 46648053643 10930565914 54123318493
forecast_2024_table <- data.frame(
  Bulan = time(forecast_2024$mean),
  Forecast_PPN = as.numeric(forecast_2024$mean),
  Lower_80 = as.numeric(forecast_2024$lower[, 1]),
  Upper_80 = as.numeric(forecast_2024$upper[, 1]),
  Lower_95 = as.numeric(forecast_2024$lower[, 2]),
  Upper_95 = as.numeric(forecast_2024$upper[, 2])
)

forecast_2024_table
##       Bulan Forecast_PPN    Lower_80    Upper_80    Lower_95    Upper_95
## 1  2024.000  33884391941 23489017444 44279766439 17986038595 49782745288
## 2  2024.083  33057487043 21326762204 44788211882 15116891584 50998082502
## 3  2024.167  32734283646 20493392768 44975174524 14013456561 51455110731
## 4  2024.250  32607956621 20064291393 45151621849 13424076126 51791837115
## 5  2024.333  32558580539 19779190291 45337970787 13014189737 52102971341
## 6  2024.417  32539281443 19549675467 45528887419 12673393429 52405169457
## 7  2024.500  32531738213 19343169225 45720307202 12361562529 52701913897
## 8  2024.583  32528789873 19147219118 45910360627 12063443459 52994136286
## 9  2024.667  32527637486 18956960906 46098314067 11773078669 53282196304
## 10 2024.750  32527187066 18770446944 46283927188 11488068618 53566305513
## 11 2024.833  32527011015 18586861314 46467160716 11207391822 53846630207
## 12 2024.917  32526942203 18405830764 46648053643 10930565914 54123318493
plot(
  forecast_2024,
  main = "Peramalan Penerimaan PPN Tahun 2024",
  xlab = "Waktu",
  ylab = "Jumlah PPN"
)

ts.plot(
  ppn_ts,
  forecast_2024$mean,
  col = c("black", "red"),
  lty = c(1, 2),
  xlab = "Waktu",
  ylab = "Jumlah PPN"
)

legend(
  "topleft",
  legend = c("Aktual 2019–2023", "Forecast 2024"),
  col = c("black", "red"),
  lty = c(1, 2),
  bty = "n"
)

train_ppn <- window(ppn_ts, end = c(2022, 12))
test_ppn  <- window(ppn_ts, start = c(2023, 1))

model_arima_train <- Arima(train_ppn, order = c(1, 1, 1))
forecast_arima_test <- forecast(model_arima_train, h = 12)
accuracy_arima <- accuracy(forecast_arima_test, test_ppn)
accuracy_arima
##                       ME        RMSE         MAE        MPE     MAPE      MASE
## Training set   167929680  7629266927  5694957774  -2.564484 14.40011 0.5601598
## Test set     -8071888610 12642044469 10394208011 -40.443380 44.74906 1.0223811
##                     ACF1 Theil's U
## Training set -0.02291278        NA
## Test set      0.45566803  2.357983