file_path <- "C:/Users/USER/Downloads/data denda arw.xlsx"
raw_data <- read_excel(file_path, skip = 1, col_names = FALSE)
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
names(raw_data)[1:6] <- c("Bulan","2020","2021","2022","2023","2024")
data_long <- raw_data %>%
pivot_longer(cols = c("2020","2021","2022","2023","2024"),
names_to = "Tahun", values_to = "Denda") %>%
filter(!is.na(Denda))
data_long <- data_long %>%
mutate(
Tahun = as.integer(Tahun),
Denda = as.numeric(Denda),
Bulan = as.character(Bulan)
)
bulan_order <- c("Januari","Februari","Maret","April","Mei","Juni",
"Juli","Agustus","September","Oktober","November","Desember")
data_long <- data_long %>%
filter(Bulan %in% bulan_order) %>%
mutate(Bulan = factor(Bulan, levels = bulan_order)) %>%
arrange(Tahun, Bulan) %>%
mutate(Date = as.yearmon(paste(Tahun, Bulan), "%Y %B")) %>%
mutate(Date = as.Date(Date))
ts_all <- ts(data_long$Denda, start = c(2020, 1), frequency = 12)
ts_training <- window(ts_all, end = c(2023,12))
ts_testing <- window(ts_all, start = c(2024,1))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
#4. BOXPLOT
df_training <- data_long %>% filter(Tahun <= 2023)
df_training$MonthAbb <- factor(month.abb[as.integer(format(df_training$Date,"%m"))], levels=month.abb)
ggplot(df_training, aes(factor(Tahun), Denda)) +
geom_boxplot(fill="lightblue") +
labs(title="Boxplot Data per Tahun")
ggplot(df_training, aes(MonthAbb, Denda)) +
geom_boxplot(fill="gold") +
labs(title="Boxplot Data per Bulan")
#5. IDENTIFIKASI: ACF & PACF
par(mfrow=c(1,2))
acf(ts_training, main="ACF Training")
pacf(ts_training, main="PACF Training")
par(mfrow=c(1,1))
#6. UJI STASIONERITAS
cat("\n--- ADF TEST ---\n")
##
## --- ADF TEST ---
print(adf.test(ts_training))
##
## Augmented Dickey-Fuller Test
##
## data: ts_training
## Dickey-Fuller = -2.7682, Lag order = 3, p-value = 0.2673
## alternative hypothesis: stationary
cat("\n--- KPSS TEST ---\n")
##
## --- KPSS TEST ---
print(kpss.test(ts_training))
## Warning in kpss.test(ts_training): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: ts_training
## KPSS Level = 0.18791, Truncation lag parameter = 3, p-value = 0.1
#7. Box Cox Transformation
y <- ts_training
min_val <- min(y)
shift <- 0
if(min_val <= 0){
shift <- abs(min_val) + 1
y_shift <- y + shift
cat("Data <= 0 → dilakukan shifting: ", shift, "\n")
} else {
y_shift <- y
}
fit_dummy <- lm(y_shift ~ 1)
bc <- boxcox(fit_dummy, lambda = seq(-2,2,0.1),
main="Box–Cox Transformation (95% CI)")
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
## extra argument 'main' will be disregarded
lambda_opt <- bc$x[which.max(bc$y)]
ci <- bc$x[bc$y > max(bc$y) - qchisq(0.95,1)/2]
cat("Lambda optimal =", lambda_opt, "\n")
## Lambda optimal = 0.3434343
cat("CI 95% =", ci[1], "sampai", ci[length(ci)], "\n")
## CI 95% = 0.1010101 sampai 0.6262626
y_bc <- BoxCox(y_shift, lambda_opt)
par(mfrow=c(2,1))
plot(y, main="Data Asli", ylab="Denda")
plot(y_bc, main=paste("Setelah Box–Cox (λ =", round(lambda_opt,3), ")"), ylab="Denda BC")
par(mfrow=c(1,1))
#8. DOUBLE EXPONENTIAL SMOOTHING (HOLT) — MODEL UTAMA
model_holt <- holt(ts_training,
initial="optimal",
h=length(ts_testing),
exponential=FALSE)
summary(model_holt)
##
## Forecast method: Holt's method
##
## Model Information:
## Holt's method
##
## Call:
## holt(y = ts_training, h = length(ts_testing), initial = "optimal",
## exponential = FALSE)
##
## Smoothing parameters:
## alpha = 0.3681
## beta = 1e-04
##
## Initial states:
## l = 122128399.7153
## b = -3780854.6827
##
## sigma: 49224990
##
## AIC AICc BIC
## 1891.985 1893.413 1901.341
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 5590486 47129340 35856275 -111.0776 154.1508 0.5740707 0.03568187
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2024 36038588 -27045775 99122951 -60440620 132517796
## Feb 2024 32284569 -34939549 99508686 -70525847 135094984
## Mar 2024 28530550 -42594833 99655933 -80246337 137307436
## Apr 2024 24776530 -50048943 99602003 -89659155 139212216
## May 2024 21022511 -57330383 99375405 -98807899 140852922
## Jun 2024 17268492 -64461514 98998498 -107726766 142263750
## Jul 2024 13514473 -71460260 98489205 -116443166 143472111
## Aug 2024 9760453 -78341249 97862156 -124979473 144500380
## Sep 2024 6006434 -85116605 97129474 -133354229 145367097
## Oct 2024 2252415 -91796510 96301340 -141583004 146087834
## Nov 2024 -1501604 -98389612 95386404 -149679025 146675817
## Dec 2024 -5255623 -104903331 94392084 -157653641 147142394
# Fitted
autoplot(ts_training, series="Actual") +
autolayer(model_holt$fitted, series="Fitted Holt") +
labs(title="Double Exponential Smoothing (Holt)",
y="Nilai Denda")
# Forecast
autoplot(model_holt) +
autolayer(ts_testing, series="Actual Testing", PI=FALSE) +
labs(title="Forecast Holt vs Testing")
## Warning in ggplot2::geom_line(ggplot2::aes(x = .data[["timeVal"]], y =
## .data[["seriesVal"]], : Ignoring unknown parameters: `PI`
# Akurasi Holt
accuracy_holt <- accuracy(model_holt, ts_testing)
cat("\n=== AKURASI MODEL HOLT ===\n")
##
## === AKURASI MODEL HOLT ===
print(accuracy_holt)
## ME RMSE MAE MPE MAPE MASE
## Training set 5590486 47129340 35856275 -111.07762 154.15083 0.5740707
## Test set 59170573 62704951 59170573 80.32592 80.32592 0.9473403
## ACF1 Theil's U
## Training set 0.03568187 NA
## Test set 0.30017674 2.125483
#9. REGRESI TREN
t <- seq_along(ts_training)
lm_trend <- lm(as.numeric(ts_training) ~ t)
summary(lm_trend)
##
## Call:
## lm(formula = as.numeric(ts_training) ~ t)
##
## Residuals:
## Min 1Q Median 3Q Max
## -72820639 -39235110 -12320695 41268270 90725432
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 83708653 14122442 5.927 3.72e-07 ***
## t -807803 501766 -1.610 0.114
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 48160000 on 46 degrees of freedom
## Multiple R-squared: 0.05334, Adjusted R-squared: 0.03276
## F-statistic: 2.592 on 1 and 46 DF, p-value: 0.1143
plot(ts_training, main="Regresi Tren", ylab="Denda")
# garis fitted training
lines(t, lm_trend$fitted.values, col="red", lwd=2)
# prediksi future
t_future <- (length(ts_training)+1):(length(ts_training)+length(ts_testing))
pred_trend <- predict(lm_trend, data.frame(t=t_future))
# garis prediksi testing
lines(t_future, pred_trend, col="darkgreen", lwd=2, lty=2)
#10. ARIMA MODEL
auto_fit <- auto.arima(ts_training, stepwise=FALSE, approximation=FALSE)
summary(auto_fit)
## Series: ts_training
## ARIMA(1,0,0) with non-zero mean
##
## Coefficients:
## ar1 mean
## 0.3561 64314015
## s.e. 0.1351 10179903
##
## sigma^2 = 2.136e+15: log likelihood = -914.31
## AIC=1834.61 AICc=1835.16 BIC=1840.23
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -491164.4 45249023 38082072 -153.5304 183.1281 0.6097065
## ACF1
## Training set -0.02521728
m1 <- Arima(ts_training, order=c(1,1,0))
m2 <- Arima(ts_training, order=c(0,1,1))
m3 <- Arima(ts_training, order=c(1,1,1))
aic_df <- data.frame(
Model=c("ARIMA(1,1,0)","ARIMA(0,1,1)","ARIMA(1,1,1)"),
AIC=c(AIC(m1),AIC(m2),AIC(m3))
)
print(aic_df)
## Model AIC
## 1 ARIMA(1,1,0) 1805.586
## 2 ARIMA(0,1,1) 1799.419
## 3 ARIMA(1,1,1) 1800.475
best_idx <- which.min(aic_df$AIC)
best_model <- list(m1,m2,m3)[[best_idx]]
cat("Model ARIMA terbaik manual:", aic_df$Model[best_idx], "\n")
## Model ARIMA terbaik manual: ARIMA(0,1,1)
checkresiduals(best_model)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1)
## Q* = 7.3811, df = 9, p-value = 0.5975
##
## Model df: 1. Total lags used: 10
fc_arima <- forecast(best_model, h=length(ts_testing))
autoplot(fc_arima) +
autolayer(ts_testing, series="Actual") +
labs(title="Forecast ARIMA")
acc_arima <- accuracy(fc_arima, ts_testing)
cat("\n=== AKURASI ARIMA ===\n")
##
## === AKURASI ARIMA ===
print(acc_arima)
## ME RMSE MAE MPE MAPE MASE
## Training set -4496131 46979884 36678750 -155.06616 184.33412 0.5872388
## Test set 28241042 39164048 29902878 29.92843 34.50019 0.4787549
## ACF1 Theil's U
## Training set 0.04257315 NA
## Test set 0.47109867 0.996765