Data_Tubes <- read_excel("G:/My Drive/Semester 5/Analisis Runtun Waktu/Analisis Runtun Waktu/Tubes ARW/Data Tubes.xlsx")
Data_Tubes$Data <- gsub("\\.", "", Data_Tubes$Data)
Data_Tubes$Data <- as.numeric(Data_Tubes$Data)
print(Data_Tubes)
## # A tibble: 60 × 3
##    Tahun Bulan      Data
##    <dbl> <dbl>     <dbl>
##  1  2020     1 120255000
##  2  2020     2 146131000
##  3  2020     3 164074000
##  4  2020     4  64987000
##  5  2020     5   6849000
##  6  2020     6  66456000
##  7  2020     7 152926000
##  8  2020     8 131194000
##  9  2020     9  60919000
## 10  2020    10  99546000
## # ℹ 50 more rows
sum(is.na(Data_Tubes$Data)) 
## [1] 0
cat("=== Statistik Deskriptif ===\n")
## === Statistik Deskriptif ===
summary(Data_Tubes$Data)  
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##   2130000  33859606  54850833  66046398  93062500 164074000
ts_data <- ts(Data_Tubes$Data, start = c(2020, 1), frequency = 12)

training <- Data_Tubes[1:48, ]    
testing  <- Data_Tubes[49:60, ]   

ts_training <- ts(training$Data, start = c(2020, 1), frequency = 12)
ts_testing  <- ts(testing$Data, start = c(2024, 1), frequency = 12)

df_training <- data.frame(
  Tahun = as.Date(zoo::as.yearmon(time(ts_training))), 
  Nilai = as.numeric(ts_training),
  Tipe = "Training"
)

df_testing <- data.frame(
  Tahun = as.Date(zoo::as.yearmon(time(ts_testing))), 
  Nilai = as.numeric(ts_testing),
  Tipe = "Testing"
)
df_plot <- bind_rows(df_training, df_testing)
ggplot(df_plot, aes(x = Tahun, y = Nilai, color = Tipe, linetype = Tipe)) +
  geom_line(size = 0.5) +
  scale_color_manual(values = c("Training" = "blue", "Testing" = "blue")) +
  scale_linetype_manual(values = c("Training" = "solid", "Testing" = "dashed")) +
  geom_vline(xintercept = as.Date("2024-01-01"), linetype = "dotted", color = "gray40") +
  labs(title = "Plot Deret Waktu (Training vs Testing)",
       x = "Tahun", y = "Nilai") +
  theme_minimal(base_size = 12) +
  theme(legend.title = element_blank())
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#boxplot tahunan
training$Periode <- "2020–2023"

ggplot(training, aes(x = Periode, y = Data)) +
  geom_boxplot(fill = "lightgreen", color = "darkgreen",
               outlier.colour = "red", outlier.size = 2) +
  labs(
    title = "Boxplot Data Training (2020–2023)",
    x = "",
    y = "Nilai Denda Pelanggaran"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5, size = 14),
    axis.title = element_text(face = "bold"),
    axis.text.x = element_text(face = "bold")
  )

#boxplot per tahun
training$Tahun <- rep(2020:2023, each = 12)

ggplot(training, aes(x = factor(Tahun), y = Data)) +
  geom_boxplot(fill = "skyblue", color = "darkblue", outlier.colour = "red", outlier.size = 2) +
  labs(title = "Boxplot Data Training per Tahun (2020–2023)",
       x = "Tahun",
       y = "Nilai Denda Pelanggaran") +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5, size = 14),
    axis.title = element_text(face = "bold")
  )

#boxplot perbulan
training$Index <- 1:nrow(training)
training$Tahun <- 2020 + (training$Index - 1) %/% 12
training$BulanNum <- ((training$Index - 1) %% 12) + 1
training$Bulan <- month.abb[training$BulanNum]
training$Bulan <- factor(training$Bulan, levels = month.abb)
ggplot(training, aes(x = Bulan, y = Data)) +
  geom_boxplot(fill = "gold", color = "darkorange",
               outlier.colour = "red", outlier.size = 2) +
  labs(title = "Boxplot Data Training per Bulan (2020–2023)",
       x = "Bulan",
       y = "Nilai Denda Pelanggaran") +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5, size = 14),
    axis.title = element_text(face = "bold"),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

model_des_fit <- holt(ts_training, h = length(ts_testing),
                      initial = "optimal", seasonal = "none")

nilai_fitted_des <- ts(fitted(model_des_fit),
                       start = start(ts_training),
                       frequency = frequency(ts_training))

print("--- Ringkasan Model Double Exponential Smoothing (Holt's Method) ---")
## [1] "--- Ringkasan Model Double Exponential Smoothing (Holt's Method) ---"
summary(model_des_fit)
## 
## Forecast method: Holt's method
## 
## Model Information:
## Holt's method 
## 
## Call:
## holt(y = ts_training, h = length(ts_testing), initial = "optimal", 
##     seasonal = "none")
## 
##   Smoothing parameters:
##     alpha = 0.3681 
##     beta  = 1e-04 
## 
##   Initial states:
##     l = 122128399.7153 
##     b = -3780854.6827 
## 
##   sigma:  49224990
## 
##      AIC     AICc      BIC 
## 1891.985 1893.413 1901.341 
## 
## Error measures:
##                   ME     RMSE      MAE       MPE     MAPE      MASE       ACF1
## Training set 5590486 47129340 35856275 -111.0776 154.1508 0.5740707 0.03568187
## 
## Forecasts:
##          Point Forecast      Lo 80    Hi 80      Lo 95     Hi 95
## Jan 2024       36038588  -27045775 99122951  -60440620 132517796
## Feb 2024       32284569  -34939549 99508686  -70525847 135094984
## Mar 2024       28530550  -42594833 99655933  -80246337 137307436
## Apr 2024       24776530  -50048943 99602003  -89659155 139212216
## May 2024       21022511  -57330383 99375405  -98807899 140852922
## Jun 2024       17268492  -64461514 98998498 -107726766 142263750
## Jul 2024       13514473  -71460260 98489205 -116443166 143472111
## Aug 2024        9760453  -78341249 97862156 -124979473 144500380
## Sep 2024        6006434  -85116605 97129474 -133354229 145367097
## Oct 2024        2252415  -91796510 96301340 -141583004 146087834
## Nov 2024       -1501604  -98389612 95386404 -149679025 146675817
## Dec 2024       -5255623 -104903331 94392084 -157653641 147142394
# --- Plot hasil DES ---
autoplot(ts_training, series = "Data Aktual (2020-2023)") +
  autolayer(nilai_fitted_des, series = "Garis DES (Smoothing)") +
  labs(
    title = "Double Exponential Smoothing (DES) (2020-2023)",
    subtitle = paste(
      "Alpha Optimal:", round(model_des_fit$model$par["alpha"], 4),
      "| Beta Optimal:", round(model_des_fit$model$par["beta"], 4)
    ),
    y = "Jumlah Denda (Rp)",
    x = "Waktu"
  ) +
  guides(colour = guide_legend(title = "Deret Waktu")) +
  theme(legend.position = "bottom")

cat("=== UJI STASIONERITAS ADF TEST ===\n")
## === UJI STASIONERITAS ADF TEST ===
adf_test_result <- adf.test(ts_training)
print(adf_test_result) 
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_training
## Dickey-Fuller = -2.7682, Lag order = 3, p-value = 0.2673
## alternative hypothesis: stationary
t <- 1:length(ts_training)
lm_trend <- lm(ts_training ~ t)
summary(lm_trend)
## 
## Call:
## lm(formula = ts_training ~ t)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -72820639 -39235110 -12320695  41268270  90725432 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 83708653   14122442   5.927 3.72e-07 ***
## t            -807803     501766  -1.610    0.114    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 48160000 on 46 degrees of freedom
## Multiple R-squared:  0.05334,    Adjusted R-squared:  0.03276 
## F-statistic: 2.592 on 1 and 46 DF,  p-value: 0.1143
plot(ts_training, main="Regresi Tren", ylab="Denda")
lines(ts_training, col="black")
lines(lm_trend$fitted.values, col="red", lwd=2)

t_future <- (length(ts_training)+1):(length(ts_training)+12)
reg_pred <- predict(lm_trend, newdata = list(t = t_future))
acf(ts_data, main = "ACF Original Data")

adf_test <- adf.test(ts_data)
cat("\n=== Uji ADF (Mean Stasioner?) ===\n")
## 
## === Uji ADF (Mean Stasioner?) ===
print(adf_test)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_data
## Dickey-Fuller = -3.2627, Lag order = 3, p-value = 0.08607
## alternative hypothesis: stationary
cat("\n=== Uji Stasioner Variansi (Manual Check) ===\n")
## 
## === Uji Stasioner Variansi (Manual Check) ===
n <- length(ts_data)
half <- floor(n/2)

bagian_awal  <- ts_data[1:half]
bagian_akhir <- ts_data[(half+1):n]

var_awal  <- var(bagian_awal)
var_akhir <- var(bagian_akhir)

mean_awal  <- mean(bagian_awal)
mean_akhir <- mean(bagian_akhir)

cat("Mean Bagian Awal  :", round(mean_awal, 3), "\n")
## Mean Bagian Awal  : 74138807
cat("Mean Bagian Akhir :", round(mean_akhir, 3), "\n\n")
## Mean Bagian Akhir : 57953989
cat("Varians Bagian Awal  :", round(var_awal, 3), "\n")
## Varians Bagian Awal  : 2.866654e+15
cat("Varians Bagian Akhir :", round(var_akhir, 3), "\n\n")
## Varians Bagian Akhir : 1.2262e+15
if(abs(var_awal - var_akhir) / var_awal < 0.20){
  cat("Kesimpulan: Variansi relatif stabil (stasioner variansi)\n")
} else {
  cat("Kesimpulan: Variansi berubah signifikan → tidak stasioner variansi\n")
}
## Kesimpulan: Variansi berubah signifikan → tidak stasioner variansi
cat("=== LANGKAH 1: Box-Cox Sebelum Transformasi ===\n")
## === LANGKAH 1: Box-Cox Sebelum Transformasi ===
lambda_opt <- BoxCox.lambda(ts_data)
cat("Lambda optimal =", lambda_opt, "\n\n")
## Lambda optimal = 0.5979329
boxcox(ts_data ~ 1,
       lambda = seq(-2, 2, 0.1),
       main = "Box–Cox Plot (SEBELUM Transformasi)")
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'main' will be disregarded

cat("\n=== LANGKAH 2: Transformasi Box–Cox ===\n")
## 
## === LANGKAH 2: Transformasi Box–Cox ===
ts_bc <- BoxCox(ts_data, lambda_opt)

plot(ts_data, type="l", col="darkred", lwd=2,
     main="Data Asli (Sebelum Transformasi Box–Cox)",
     ylab="Nilai", xlab="Time")

cat("\n=== LANGKAH 3: ACF & PACF Setelah Transformasi ===\n")
## 
## === LANGKAH 3: ACF & PACF Setelah Transformasi ===
par(mfrow=c(1,2))
acf(ts_bc, main="ACF - Setelah Transformasi Box–Cox")
pacf(ts_bc, main="PACF - Setelah Transformasi Box–Cox")

par(mfrow=c(1,1))

cat("\n=== LANGKAH 4: Uji ADF Setelah Transformasi ===\n")
## 
## === LANGKAH 4: Uji ADF Setelah Transformasi ===
adf_bc <- adf.test(ts_bc)
print(adf_bc)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_bc
## Dickey-Fuller = -3.2332, Lag order = 3, p-value = 0.09073
## alternative hypothesis: stationary
cat("\nInterpretasi:\n")
## 
## Interpretasi:
if(adf_bc$p.value < 0.05){
  cat("→ p-value < 0.05 → Data SUDAH stasioner mean setelah transformasi.\n")
} else {
  cat("→ p-value ≥ 0.05 → Data BELUM stasioner mean → perlu differencing.\n")
}
## → p-value ≥ 0.05 → Data BELUM stasioner mean → perlu differencing.
ts_bc_diff <- diff(ts_bc)   

cat("\n=== LANGKAH 6: Data Setelah Differencing (Transformasi + Diff 1) ===\n")
## 
## === LANGKAH 6: Data Setelah Differencing (Transformasi + Diff 1) ===
print(head(ts_bc_diff))
##            Feb       Mar       Apr       May       Jun       Jul
## 2020  14018.35   9138.05 -58076.64 -58059.01  59115.31  51393.24
cat("\n=== LANGKAH 7: ACF & PACF Setelah Differencing ===\n")
## 
## === LANGKAH 7: ACF & PACF Setelah Differencing ===
par(mfrow=c(1,2))
acf(ts_bc_diff, main="ACF - Setelah Differencing")
pacf(ts_bc_diff, main="PACF - Setelah Differencing")

par(mfrow=c(1,1))

cat("\n=== LANGKAH 8: Uji ADF Setelah Differencing ===\n")
## 
## === LANGKAH 8: Uji ADF Setelah Differencing ===
adf_diff <- adf.test(ts_bc_diff)
## Warning in adf.test(ts_bc_diff): p-value smaller than printed p-value
print(adf_diff)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_bc_diff
## Dickey-Fuller = -5.9682, Lag order = 3, p-value = 0.01
## alternative hypothesis: stationary
cat("\nInterpretasi:\n")
## 
## Interpretasi:
if(adf_diff$p.value < 0.05){
  cat("→ p-value < 0.05 → Data SUDAH stasioner mean setelah differencing.\n")
} else {
  cat("→ Data MASIH belum stasioner, mungkin perlu differencing tambahan.\n")
}
## → p-value < 0.05 → Data SUDAH stasioner mean setelah differencing.
cat("\n=== LANGKAH 9: Uji ARCH / Heteroskedastisitas ===\n")
## 
## === LANGKAH 9: Uji ARCH / Heteroskedastisitas ===
arch_test <- ArchTest(ts_bc_diff)

print(arch_test)
## 
##  ARCH LM-test; Null hypothesis: no ARCH effects
## 
## data:  ts_bc_diff
## Chi-squared = 5.585, df = 12, p-value = 0.9355
cat("\nInterpretasi ARCH:\n")
## 
## Interpretasi ARCH:
if(arch_test$p.value > 0.05){
  cat("→ p-value > 0.05 → TIDAK ada heteroskedastisitas → variansi sudah stasioner.\n")
} else {
  cat("→ p-value ≤ 0.05 → Ada heteroskedastisitas → variansi BELUM stasioner.\n")
}
## → p-value > 0.05 → TIDAK ada heteroskedastisitas → variansi sudah stasioner.
cat("=== ARIMA MANUAL TANPA auto.arima ===\n")
## === ARIMA MANUAL TANPA auto.arima ===
m_arima_011 <- arima(ts_bc, order=c(0,1,1))
m_arima_111 <- arima(ts_bc, order=c(1,1,1))
m_arima_012 <- arima(ts_bc, order=c(0,1,2))
m_arima_211 <- arima(ts_bc, order=c(2,1,1))
m_arima_110 <- arima(ts_bc, order=c(1,1,0))

cat("\n=== AIC SETIAP MODEL ===\n")
## 
## === AIC SETIAP MODEL ===
AIC(m_arima_011, m_arima_111, m_arima_012, m_arima_211, m_arima_110)
##             df      AIC
## m_arima_011  2 1398.546
## m_arima_111  3 1396.919
## m_arima_012  3 1399.384
## m_arima_211  4 1398.472
## m_arima_110  2 1404.502
model_best <- m_arima_110   

cat("\n=== MODEL TERPILIH ===\n")
## 
## === MODEL TERPILIH ===
print(model_best)
## 
## Call:
## arima(x = ts_bc, order = c(1, 1, 0))
## 
## Coefficients:
##           ar1
##       -0.3597
## s.e.   0.1201
## 
## sigma^2 estimated as 1.19e+09:  log likelihood = -700.25,  aic = 1404.5
par(mfrow=c(2,2))
plot(model_best$residuals, main="Residual Plot")
acf(model_best$residuals, main="ACF Residual")
pacf(model_best$residuals, main="PACF Residual")
hist(model_best$residuals, main="Histogram Residual", xlab="Residual")

par(mfrow=c(1,1))

cat("\n=== UJI LJUNG-BOX ===\n")
## 
## === UJI LJUNG-BOX ===
print(Box.test(model_best$residuals, lag=12, type="Ljung"))
## 
##  Box-Ljung test
## 
## data:  model_best$residuals
## X-squared = 12.934, df = 12, p-value = 0.3739
cat("\n=== UJI ARCH (HETEROSKEDASTISITAS) ===\n")
## 
## === UJI ARCH (HETEROSKEDASTISITAS) ===
print(ArchTest(model_best$residuals, lags=12))
## 
##  ARCH LM-test; Null hypothesis: no ARCH effects
## 
## data:  model_best$residuals
## Chi-squared = 6.2519, df = 12, p-value = 0.9029
forecast_result <- forecast(model_best, h=12)

cat("\n=== PERAMALAN (12 PERIODE) ===\n")
## 
## === PERAMALAN (12 PERIODE) ===
print(forecast_result)
##          Point Forecast      Lo 80    Hi 80       Lo 95    Hi 95
## Jan 2025       74131.94  29925.949 118337.9    6524.708 141739.2
## Feb 2025       73118.64  20628.178 125609.1   -7158.593 153395.9
## Mar 2025       73483.16  10930.056 136036.3  -22183.560 169149.9
## Apr 2025       73352.03   3104.427 143599.6  -34082.408 180786.5
## May 2025       73399.20  -4089.120 150887.5  -45108.961 191907.4
## Jun 2025       73382.23 -10622.554 157387.0  -55092.002 201856.5
## Jul 2025       73388.34 -16697.141 163473.8  -64385.514 211162.2
## Aug 2025       73386.14 -22383.066 169155.4  -73080.223 219852.5
## Sep 2025       73386.93 -27751.068 174524.9  -81290.292 228064.2
## Oct 2025       73386.65 -32847.805 179621.1  -89084.927 235858.2
## Nov 2025       73386.75 -37711.076 184484.6  -96522.713 243296.2
## Dec 2025       73386.71 -42370.177 189143.6 -103648.173 250421.6
plot(forecast_result, main="Forecast ARIMA Manual")

cat("\n=== UJI KOLMOGOROV–SMIRNOV (Normalitas) ===\n")
## 
## === UJI KOLMOGOROV–SMIRNOV (Normalitas) ===
resid <- model_best$residuals
ks <- ks.test(resid, "pnorm", mean(resid), sd(resid))
print(ks)
## 
##  Exact one-sample Kolmogorov-Smirnov test
## 
## data:  resid
## D = 0.061023, p-value = 0.9688
## alternative hypothesis: two-sided
cat("\nInterpretasi KS Test:\n")
## 
## Interpretasi KS Test:
if (ks$p.value > 0.05) {
  cat("→ p-value > 0.05 → residual BERDISTRIBUSI NORMAL (tidak menolak H0).\n")
} else {
  cat("→ p-value ≤ 0.05 → residual TIDAK NORMAL (menolak H0).\n")
}
## → p-value > 0.05 → residual BERDISTRIBUSI NORMAL (tidak menolak H0).
rmse  <- function(a,b) sqrt(mean((a-b)^2))
mape  <- function(a,b) mean(abs((a-b)/a)) * 100
smape <- function(a,b) mean(2 * abs(a-b) / (abs(a)+abs(b))) * 100

actual <- ts_bc[2:length(ts_bc)]
pred   <- ts_bc[2:length(ts_bc)] - model_best$residuals
## Warning in `-.default`(ts_bc[2:length(ts_bc)], model_best$residuals): longer
## object length is not a multiple of shorter object length
n <- min(length(actual), length(pred))
actual <- actual[1:n]
pred   <- pred[1:n]

eval <- data.frame(
  Model = c("Model_Best"),
  RMSE  = c(rmse(actual, pred)),
  MAPE  = c(mape(actual, pred)),
  SMAPE = c(smape(actual, pred))
)

print(eval)
##        Model    RMSE     MAPE    SMAPE
## 1 Model_Best 34481.9 47.42049 44.56981