Data_Tubes <- read_excel("G:/My Drive/Semester 5/Analisis Runtun Waktu/Analisis Runtun Waktu/Tubes ARW/Data Tubes.xlsx")
Data_Tubes$Data <- gsub("\\.", "", Data_Tubes$Data)
Data_Tubes$Data <- as.numeric(Data_Tubes$Data)
print(Data_Tubes)
## # A tibble: 60 × 3
## Tahun Bulan Data
## <dbl> <dbl> <dbl>
## 1 2020 1 120255000
## 2 2020 2 146131000
## 3 2020 3 164074000
## 4 2020 4 64987000
## 5 2020 5 6849000
## 6 2020 6 66456000
## 7 2020 7 152926000
## 8 2020 8 131194000
## 9 2020 9 60919000
## 10 2020 10 99546000
## # ℹ 50 more rows
sum(is.na(Data_Tubes$Data))
## [1] 0
cat("=== Statistik Deskriptif ===\n")
## === Statistik Deskriptif ===
summary(Data_Tubes$Data)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2130000 33859606 54850833 66046398 93062500 164074000
ts_data <- ts(Data_Tubes$Data, start = c(2020, 1), frequency = 12)
training <- Data_Tubes[1:48, ]
testing <- Data_Tubes[49:60, ]
ts_training <- ts(training$Data, start = c(2020, 1), frequency = 12)
ts_testing <- ts(testing$Data, start = c(2024, 1), frequency = 12)
df_training <- data.frame(
Tahun = as.Date(zoo::as.yearmon(time(ts_training))),
Nilai = as.numeric(ts_training),
Tipe = "Training"
)
df_testing <- data.frame(
Tahun = as.Date(zoo::as.yearmon(time(ts_testing))),
Nilai = as.numeric(ts_testing),
Tipe = "Testing"
)
df_plot <- bind_rows(df_training, df_testing)
ggplot(df_plot, aes(x = Tahun, y = Nilai, color = Tipe, linetype = Tipe)) +
geom_line(size = 0.5) +
scale_color_manual(values = c("Training" = "blue", "Testing" = "blue")) +
scale_linetype_manual(values = c("Training" = "solid", "Testing" = "dashed")) +
geom_vline(xintercept = as.Date("2024-01-01"), linetype = "dotted", color = "gray40") +
labs(title = "Plot Deret Waktu (Training vs Testing)",
x = "Tahun", y = "Nilai") +
theme_minimal(base_size = 12) +
theme(legend.title = element_blank())
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#boxplot tahunan
training$Periode <- "2020–2023"
ggplot(training, aes(x = Periode, y = Data)) +
geom_boxplot(fill = "lightgreen", color = "darkgreen",
outlier.colour = "red", outlier.size = 2) +
labs(
title = "Boxplot Data Training (2020–2023)",
x = "",
y = "Nilai Denda Pelanggaran"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", hjust = 0.5, size = 14),
axis.title = element_text(face = "bold"),
axis.text.x = element_text(face = "bold")
)

#boxplot per tahun
training$Tahun <- rep(2020:2023, each = 12)
ggplot(training, aes(x = factor(Tahun), y = Data)) +
geom_boxplot(fill = "skyblue", color = "darkblue", outlier.colour = "red", outlier.size = 2) +
labs(title = "Boxplot Data Training per Tahun (2020–2023)",
x = "Tahun",
y = "Nilai Denda Pelanggaran") +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", hjust = 0.5, size = 14),
axis.title = element_text(face = "bold")
)

#boxplot perbulan
training$Index <- 1:nrow(training)
training$Tahun <- 2020 + (training$Index - 1) %/% 12
training$BulanNum <- ((training$Index - 1) %% 12) + 1
training$Bulan <- month.abb[training$BulanNum]
training$Bulan <- factor(training$Bulan, levels = month.abb)
ggplot(training, aes(x = Bulan, y = Data)) +
geom_boxplot(fill = "gold", color = "darkorange",
outlier.colour = "red", outlier.size = 2) +
labs(title = "Boxplot Data Training per Bulan (2020–2023)",
x = "Bulan",
y = "Nilai Denda Pelanggaran") +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", hjust = 0.5, size = 14),
axis.title = element_text(face = "bold"),
axis.text.x = element_text(angle = 45, hjust = 1)
)

model_des_fit <- holt(ts_training, h = length(ts_testing),
initial = "optimal", seasonal = "none")
nilai_fitted_des <- ts(fitted(model_des_fit),
start = start(ts_training),
frequency = frequency(ts_training))
print("--- Ringkasan Model Double Exponential Smoothing (Holt's Method) ---")
## [1] "--- Ringkasan Model Double Exponential Smoothing (Holt's Method) ---"
summary(model_des_fit)
##
## Forecast method: Holt's method
##
## Model Information:
## Holt's method
##
## Call:
## holt(y = ts_training, h = length(ts_testing), initial = "optimal",
## seasonal = "none")
##
## Smoothing parameters:
## alpha = 0.3681
## beta = 1e-04
##
## Initial states:
## l = 122128399.7153
## b = -3780854.6827
##
## sigma: 49224990
##
## AIC AICc BIC
## 1891.985 1893.413 1901.341
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 5590486 47129340 35856275 -111.0776 154.1508 0.5740707 0.03568187
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2024 36038588 -27045775 99122951 -60440620 132517796
## Feb 2024 32284569 -34939549 99508686 -70525847 135094984
## Mar 2024 28530550 -42594833 99655933 -80246337 137307436
## Apr 2024 24776530 -50048943 99602003 -89659155 139212216
## May 2024 21022511 -57330383 99375405 -98807899 140852922
## Jun 2024 17268492 -64461514 98998498 -107726766 142263750
## Jul 2024 13514473 -71460260 98489205 -116443166 143472111
## Aug 2024 9760453 -78341249 97862156 -124979473 144500380
## Sep 2024 6006434 -85116605 97129474 -133354229 145367097
## Oct 2024 2252415 -91796510 96301340 -141583004 146087834
## Nov 2024 -1501604 -98389612 95386404 -149679025 146675817
## Dec 2024 -5255623 -104903331 94392084 -157653641 147142394
# --- Plot hasil DES ---
autoplot(ts_training, series = "Data Aktual (2020-2023)") +
autolayer(nilai_fitted_des, series = "Garis DES (Smoothing)") +
labs(
title = "Double Exponential Smoothing (DES) (2020-2023)",
subtitle = paste(
"Alpha Optimal:", round(model_des_fit$model$par["alpha"], 4),
"| Beta Optimal:", round(model_des_fit$model$par["beta"], 4)
),
y = "Jumlah Denda (Rp)",
x = "Waktu"
) +
guides(colour = guide_legend(title = "Deret Waktu")) +
theme(legend.position = "bottom")

cat("=== UJI STASIONERITAS ADF TEST ===\n")
## === UJI STASIONERITAS ADF TEST ===
adf_test_result <- adf.test(ts_training)
print(adf_test_result)
##
## Augmented Dickey-Fuller Test
##
## data: ts_training
## Dickey-Fuller = -2.7682, Lag order = 3, p-value = 0.2673
## alternative hypothesis: stationary
t <- 1:length(ts_training)
lm_trend <- lm(ts_training ~ t)
summary(lm_trend)
##
## Call:
## lm(formula = ts_training ~ t)
##
## Residuals:
## Min 1Q Median 3Q Max
## -72820639 -39235110 -12320695 41268270 90725432
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 83708653 14122442 5.927 3.72e-07 ***
## t -807803 501766 -1.610 0.114
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 48160000 on 46 degrees of freedom
## Multiple R-squared: 0.05334, Adjusted R-squared: 0.03276
## F-statistic: 2.592 on 1 and 46 DF, p-value: 0.1143
plot(ts_training, main="Regresi Tren", ylab="Denda")
lines(ts_training, col="black")
lines(lm_trend$fitted.values, col="red", lwd=2)

t_future <- (length(ts_training)+1):(length(ts_training)+12)
reg_pred <- predict(lm_trend, newdata = list(t = t_future))
acf(ts_data, main = "ACF Original Data")

adf_test <- adf.test(ts_data)
cat("\n=== Uji ADF (Mean Stasioner?) ===\n")
##
## === Uji ADF (Mean Stasioner?) ===
print(adf_test)
##
## Augmented Dickey-Fuller Test
##
## data: ts_data
## Dickey-Fuller = -3.2627, Lag order = 3, p-value = 0.08607
## alternative hypothesis: stationary
cat("\n=== Uji Stasioner Variansi (Manual Check) ===\n")
##
## === Uji Stasioner Variansi (Manual Check) ===
n <- length(ts_data)
half <- floor(n/2)
bagian_awal <- ts_data[1:half]
bagian_akhir <- ts_data[(half+1):n]
var_awal <- var(bagian_awal)
var_akhir <- var(bagian_akhir)
mean_awal <- mean(bagian_awal)
mean_akhir <- mean(bagian_akhir)
cat("Mean Bagian Awal :", round(mean_awal, 3), "\n")
## Mean Bagian Awal : 74138807
cat("Mean Bagian Akhir :", round(mean_akhir, 3), "\n\n")
## Mean Bagian Akhir : 57953989
cat("Varians Bagian Awal :", round(var_awal, 3), "\n")
## Varians Bagian Awal : 2.866654e+15
cat("Varians Bagian Akhir :", round(var_akhir, 3), "\n\n")
## Varians Bagian Akhir : 1.2262e+15
if(abs(var_awal - var_akhir) / var_awal < 0.20){
cat("Kesimpulan: Variansi relatif stabil (stasioner variansi)\n")
} else {
cat("Kesimpulan: Variansi berubah signifikan → tidak stasioner variansi\n")
}
## Kesimpulan: Variansi berubah signifikan → tidak stasioner variansi
cat("=== LANGKAH 1: Box-Cox Sebelum Transformasi ===\n")
## === LANGKAH 1: Box-Cox Sebelum Transformasi ===
lambda_opt <- BoxCox.lambda(ts_data)
cat("Lambda optimal =", lambda_opt, "\n\n")
## Lambda optimal = 0.5979329
boxcox(ts_data ~ 1,
lambda = seq(-2, 2, 0.1),
main = "Box–Cox Plot (SEBELUM Transformasi)")
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'main' will be disregarded

cat("\n=== LANGKAH 2: Transformasi Box–Cox ===\n")
##
## === LANGKAH 2: Transformasi Box–Cox ===
ts_bc <- BoxCox(ts_data, lambda_opt)
plot(ts_data, type="l", col="darkred", lwd=2,
main="Data Asli (Sebelum Transformasi Box–Cox)",
ylab="Nilai", xlab="Time")

cat("\n=== LANGKAH 3: ACF & PACF Setelah Transformasi ===\n")
##
## === LANGKAH 3: ACF & PACF Setelah Transformasi ===
par(mfrow=c(1,2))
acf(ts_bc, main="ACF - Setelah Transformasi Box–Cox")
pacf(ts_bc, main="PACF - Setelah Transformasi Box–Cox")

par(mfrow=c(1,1))
cat("\n=== LANGKAH 4: Uji ADF Setelah Transformasi ===\n")
##
## === LANGKAH 4: Uji ADF Setelah Transformasi ===
adf_bc <- adf.test(ts_bc)
print(adf_bc)
##
## Augmented Dickey-Fuller Test
##
## data: ts_bc
## Dickey-Fuller = -3.2332, Lag order = 3, p-value = 0.09073
## alternative hypothesis: stationary
cat("\nInterpretasi:\n")
##
## Interpretasi:
if(adf_bc$p.value < 0.05){
cat("→ p-value < 0.05 → Data SUDAH stasioner mean setelah transformasi.\n")
} else {
cat("→ p-value ≥ 0.05 → Data BELUM stasioner mean → perlu differencing.\n")
}
## → p-value ≥ 0.05 → Data BELUM stasioner mean → perlu differencing.
ts_bc_diff <- diff(ts_bc)
cat("\n=== LANGKAH 6: Data Setelah Differencing (Transformasi + Diff 1) ===\n")
##
## === LANGKAH 6: Data Setelah Differencing (Transformasi + Diff 1) ===
print(head(ts_bc_diff))
## Feb Mar Apr May Jun Jul
## 2020 14018.35 9138.05 -58076.64 -58059.01 59115.31 51393.24
cat("\n=== LANGKAH 7: ACF & PACF Setelah Differencing ===\n")
##
## === LANGKAH 7: ACF & PACF Setelah Differencing ===
par(mfrow=c(1,2))
acf(ts_bc_diff, main="ACF - Setelah Differencing")
pacf(ts_bc_diff, main="PACF - Setelah Differencing")

par(mfrow=c(1,1))
cat("\n=== LANGKAH 8: Uji ADF Setelah Differencing ===\n")
##
## === LANGKAH 8: Uji ADF Setelah Differencing ===
adf_diff <- adf.test(ts_bc_diff)
## Warning in adf.test(ts_bc_diff): p-value smaller than printed p-value
print(adf_diff)
##
## Augmented Dickey-Fuller Test
##
## data: ts_bc_diff
## Dickey-Fuller = -5.9682, Lag order = 3, p-value = 0.01
## alternative hypothesis: stationary
cat("\nInterpretasi:\n")
##
## Interpretasi:
if(adf_diff$p.value < 0.05){
cat("→ p-value < 0.05 → Data SUDAH stasioner mean setelah differencing.\n")
} else {
cat("→ Data MASIH belum stasioner, mungkin perlu differencing tambahan.\n")
}
## → p-value < 0.05 → Data SUDAH stasioner mean setelah differencing.
cat("\n=== LANGKAH 9: Uji ARCH / Heteroskedastisitas ===\n")
##
## === LANGKAH 9: Uji ARCH / Heteroskedastisitas ===
arch_test <- ArchTest(ts_bc_diff)
print(arch_test)
##
## ARCH LM-test; Null hypothesis: no ARCH effects
##
## data: ts_bc_diff
## Chi-squared = 5.585, df = 12, p-value = 0.9355
cat("\nInterpretasi ARCH:\n")
##
## Interpretasi ARCH:
if(arch_test$p.value > 0.05){
cat("→ p-value > 0.05 → TIDAK ada heteroskedastisitas → variansi sudah stasioner.\n")
} else {
cat("→ p-value ≤ 0.05 → Ada heteroskedastisitas → variansi BELUM stasioner.\n")
}
## → p-value > 0.05 → TIDAK ada heteroskedastisitas → variansi sudah stasioner.
cat("=== ARIMA MANUAL TANPA auto.arima ===\n")
## === ARIMA MANUAL TANPA auto.arima ===
m_arima_011 <- arima(ts_bc, order=c(0,1,1))
m_arima_111 <- arima(ts_bc, order=c(1,1,1))
m_arima_012 <- arima(ts_bc, order=c(0,1,2))
m_arima_211 <- arima(ts_bc, order=c(2,1,1))
m_arima_110 <- arima(ts_bc, order=c(1,1,0))
cat("\n=== AIC SETIAP MODEL ===\n")
##
## === AIC SETIAP MODEL ===
AIC(m_arima_011, m_arima_111, m_arima_012, m_arima_211, m_arima_110)
## df AIC
## m_arima_011 2 1398.546
## m_arima_111 3 1396.919
## m_arima_012 3 1399.384
## m_arima_211 4 1398.472
## m_arima_110 2 1404.502
model_best <- m_arima_110
cat("\n=== MODEL TERPILIH ===\n")
##
## === MODEL TERPILIH ===
print(model_best)
##
## Call:
## arima(x = ts_bc, order = c(1, 1, 0))
##
## Coefficients:
## ar1
## -0.3597
## s.e. 0.1201
##
## sigma^2 estimated as 1.19e+09: log likelihood = -700.25, aic = 1404.5
par(mfrow=c(2,2))
plot(model_best$residuals, main="Residual Plot")
acf(model_best$residuals, main="ACF Residual")
pacf(model_best$residuals, main="PACF Residual")
hist(model_best$residuals, main="Histogram Residual", xlab="Residual")

par(mfrow=c(1,1))
cat("\n=== UJI LJUNG-BOX ===\n")
##
## === UJI LJUNG-BOX ===
print(Box.test(model_best$residuals, lag=12, type="Ljung"))
##
## Box-Ljung test
##
## data: model_best$residuals
## X-squared = 12.934, df = 12, p-value = 0.3739
cat("\n=== UJI ARCH (HETEROSKEDASTISITAS) ===\n")
##
## === UJI ARCH (HETEROSKEDASTISITAS) ===
print(ArchTest(model_best$residuals, lags=12))
##
## ARCH LM-test; Null hypothesis: no ARCH effects
##
## data: model_best$residuals
## Chi-squared = 6.2519, df = 12, p-value = 0.9029
forecast_result <- forecast(model_best, h=12)
cat("\n=== PERAMALAN (12 PERIODE) ===\n")
##
## === PERAMALAN (12 PERIODE) ===
print(forecast_result)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2025 74131.94 29925.949 118337.9 6524.708 141739.2
## Feb 2025 73118.64 20628.178 125609.1 -7158.593 153395.9
## Mar 2025 73483.16 10930.056 136036.3 -22183.560 169149.9
## Apr 2025 73352.03 3104.427 143599.6 -34082.408 180786.5
## May 2025 73399.20 -4089.120 150887.5 -45108.961 191907.4
## Jun 2025 73382.23 -10622.554 157387.0 -55092.002 201856.5
## Jul 2025 73388.34 -16697.141 163473.8 -64385.514 211162.2
## Aug 2025 73386.14 -22383.066 169155.4 -73080.223 219852.5
## Sep 2025 73386.93 -27751.068 174524.9 -81290.292 228064.2
## Oct 2025 73386.65 -32847.805 179621.1 -89084.927 235858.2
## Nov 2025 73386.75 -37711.076 184484.6 -96522.713 243296.2
## Dec 2025 73386.71 -42370.177 189143.6 -103648.173 250421.6
plot(forecast_result, main="Forecast ARIMA Manual")

cat("\n=== UJI KOLMOGOROV–SMIRNOV (Normalitas) ===\n")
##
## === UJI KOLMOGOROV–SMIRNOV (Normalitas) ===
resid <- model_best$residuals
ks <- ks.test(resid, "pnorm", mean(resid), sd(resid))
print(ks)
##
## Exact one-sample Kolmogorov-Smirnov test
##
## data: resid
## D = 0.061023, p-value = 0.9688
## alternative hypothesis: two-sided
cat("\nInterpretasi KS Test:\n")
##
## Interpretasi KS Test:
if (ks$p.value > 0.05) {
cat("→ p-value > 0.05 → residual BERDISTRIBUSI NORMAL (tidak menolak H0).\n")
} else {
cat("→ p-value ≤ 0.05 → residual TIDAK NORMAL (menolak H0).\n")
}
## → p-value > 0.05 → residual BERDISTRIBUSI NORMAL (tidak menolak H0).
rmse <- function(a,b) sqrt(mean((a-b)^2))
mape <- function(a,b) mean(abs((a-b)/a)) * 100
smape <- function(a,b) mean(2 * abs(a-b) / (abs(a)+abs(b))) * 100
actual <- ts_bc[2:length(ts_bc)]
pred <- ts_bc[2:length(ts_bc)] - model_best$residuals
## Warning in `-.default`(ts_bc[2:length(ts_bc)], model_best$residuals): longer
## object length is not a multiple of shorter object length
n <- min(length(actual), length(pred))
actual <- actual[1:n]
pred <- pred[1:n]
eval <- data.frame(
Model = c("Model_Best"),
RMSE = c(rmse(actual, pred)),
MAPE = c(mape(actual, pred)),
SMAPE = c(smape(actual, pred))
)
print(eval)
## Model RMSE MAPE SMAPE
## 1 Model_Best 34481.9 47.42049 44.56981