library(readxl)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(tseries)
# ⿥ Import dan pembersihan data
data <- read_excel("C:/Users/HP/Downloads/Data curah hujan 2020-2024 di Bandung (1).xlsx",
sheet = "Sheet2")
colnames(data) <- c("Tanggal", "Curah_Hujan")
data <- na.omit(data)
data$Tanggal <- as.Date(data$Tanggal)
# Tambah kolom Tahun dan Bulan
data <- data %>%
mutate(
Tahun = format(Tanggal, "%Y"),
Bulan = format(Tanggal, "%B")
)
# ⿢ Pemisahan Data Training (2020â2023) & Testing (2024)
data_train <- data %>% filter(Tahun %in% c("2020", "2021", "2022", "2023"))
data_train
## # A tibble: 48 Ă 4
## Tanggal Curah_Hujan Tahun Bulan
## <date> <dbl> <chr> <chr>
## 1 2020-01-01 208. 2020 January
## 2 2020-02-01 337 2020 February
## 3 2020-03-01 291 2020 March
## 4 2020-04-01 271 2020 April
## 5 2020-05-01 292 2020 May
## 6 2020-06-01 30 2020 June
## 7 2020-07-01 64 2020 July
## 8 2020-08-01 42 2020 August
## 9 2020-09-01 88 2020 September
## 10 2020-10-01 327 2020 October
## # âš 38 more rows
data_test <- data %>% filter(Tahun == "2024")
data_test
## # A tibble: 12 Ă 4
## Tanggal Curah_Hujan Tahun Bulan
## <date> <dbl> <chr> <chr>
## 1 2024-01-01 335 2024 January
## 2 2024-02-01 264 2024 February
## 3 2024-03-01 272 2024 March
## 4 2024-04-01 215 2024 April
## 5 2024-05-01 71 2024 May
## 6 2024-06-01 156 2024 June
## 7 2024-07-01 30 2024 July
## 8 2024-08-01 53 2024 August
## 9 2024-09-01 136 2024 September
## 10 2024-10-01 81 2024 October
## 11 2024-11-01 512 2024 November
## 12 2024-12-01 90 2024 December
# ⿣ Statistik Deskriptif
cat("===== Statistik Deskriptif Keseluruhan (2020â2024) =====\n")
## ===== Statistik Deskriptif Keseluruhan (2020â2024) =====
print(summary(data$Curah_Hujan))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.0 79.0 166.7 181.1 271.2 512.0
cat("\nStandar Deviasi:", sd(data$Curah_Hujan), "\n")
##
## Standar Deviasi: 118.0585
cat("\n===== Statistik Deskriptif Training (2020â2023) =====\n")
##
## ===== Statistik Deskriptif Training (2020â2023) =====
print(summary(data_train$Curah_Hujan))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.00 84.25 179.75 180.24 272.25 454.30
cat("\nStandar Deviasi:", sd(data_train$Curah_Hujan), "\n")
##
## Standar Deviasi: 113.0322
cat("\n===== Statistik Deskriptif Testing (2024) =====\n")
##
## ===== Statistik Deskriptif Testing (2024) =====
print(summary(data_test$Curah_Hujan))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 30.0 78.5 146.0 184.6 266.0 512.0
cat("\nStandar Deviasi:", sd(data_test$Curah_Hujan), "\n")
##
## Standar Deviasi: 141.9555
# Statistik per tahun
cat("\n===== Statistik Curah Hujan per Tahun =====\n")
##
## ===== Statistik Curah Hujan per Tahun =====
print(
data %>%
group_by(Tahun) %>%
summarise(
Rata_rata = mean(Curah_Hujan),
SD = sd(Curah_Hujan),
Minimum = min(Curah_Hujan),
Maksimum = max(Curah_Hujan)
)
)
## # A tibble: 5 Ă 5
## Tahun Rata_rata SD Minimum Maksimum
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2020 202. 115. 30 337
## 2 2021 181. 114. 33.2 454.
## 3 2022 193. 111. 29.9 367.
## 4 2023 146. 118. 18 365
## 5 2024 185. 142. 30 512
# ⿤ Visualisasi Data
# a. Time Series Plot dengan Garis Tren
ggplot(data, aes(x = Tanggal, y = Curah_Hujan)) +
geom_line(color = "black", linewidth = 0.5) +
geom_point(color = "darkred", size = 1.5) +
geom_smooth(method = "loess", se = FALSE, color = "orange", linewidth = 1) +
ggtitle("Curah Hujan Bulanan di Kota Bandung (2020â2024)") +
xlab("Tahun") +
ylab("Curah Hujan (mm)") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# b. Boxplot Keseluruhan per Tahun
ggplot(data, aes(x = Tahun, y = Curah_Hujan, fill = Tahun)) +
geom_boxplot(alpha = 0.7) +
ggtitle("Boxplot Curah Hujan per Tahun (2020â2024)") +
xlab("Tahun") +
ylab("Curah Hujan (mm)") +
theme_minimal() +
theme(legend.position = "none")

# c. Boxplot Seluruh Data
ggplot(data, aes(y = Curah_Hujan)) +
geom_boxplot(fill = "steelblue", alpha = 0.8) +
ggtitle("Boxplot Seluruh Data Curah Hujan Bandung (2020â2024)") +
ylab("Curah Hujan (mm)") +
theme_minimal()

# d. Boxplot Training dan Testing
data$Periode <- ifelse(data$Tahun == "2024", "Testing (2024)", "Training (2020â2023)")
ggplot(data, aes(x = Periode, y = Curah_Hujan, fill = Periode)) +
geom_boxplot(alpha = 0.7) +
ggtitle("Boxplot Curah Hujan Training dan Testing") +
xlab("") +
ylab("Curah Hujan (mm)") +
theme_minimal() +
theme(legend.position = "none")

# e. Boxplot per Bulan (urut sesuai kalender)
data$Bulan <- factor(
data$Bulan,
levels = c(
"January", "February", "March", "April", "May", "June",
"July", "August", "September", "October", "November", "December"
),
ordered = TRUE
)
ggplot(data, aes(x = Bulan, y = Curah_Hujan, fill = Bulan)) +
geom_boxplot(alpha = 0.8) +
ggtitle("Boxplot Curah Hujan per Bulan (2020â2024)") +
xlab("Bulan") +
ylab("Curah Hujan (mm)") +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none"
)

# ⿼ PLOT ACF DAN PACF DATA TRAINING
# Konversi data training ke time series
train_ts <- ts(data_train$Curah_Hujan, start = c(2020, 1), frequency = 12)
# Plot ACF & PACF untuk data training
par(mfrow = c(1, 2))
acf(train_ts, main = "Plot ACF Curah Hujan")
pacf(train_ts, main = "Plot PACF Curah Hujan")

par(mfrow = c(1, 1))
# ⿌ UJI STASIONERITAS (ADF TEST)
cat("\n===== UJI STASIONERITAS (ADF TEST) DATA TRAINING =====\n")
##
## ===== UJI STASIONERITAS (ADF TEST) DATA TRAINING =====
adf_result <- adf.test(train_ts)
## Warning in adf.test(train_ts): p-value smaller than printed p-value
print(adf_result)
##
## Augmented Dickey-Fuller Test
##
## data: train_ts
## Dickey-Fuller = -5.402, Lag order = 3, p-value = 0.01
## alternative hypothesis: stationary
# Jika data tidak stasioner (p-value > 0.05), lakukan differencing
if (adf_result$p.value > 0.05) {
cat("\nData tidak stasioner. Melakukan differencing 1x...\n")
diff_train <- diff(train_ts)
# Plot data setelah differencing
autoplot(diff_train) +
ggtitle("Data Training Setelah Differencing (1x)") +
xlab("Tahun") + ylab("Perubahan Curah Hujan (mm)") +
theme_minimal()
# Plot ACF & PACF setelah differencing
par(mfrow = c(1, 2))
acf(diff_train, main = "ACF Setelah Differencing (1x)")
pacf(diff_train, main = "PACF Setelah Differencing (1x)")
par(mfrow = c(1, 1))
} else {
cat("\nData sudah stasioner, tidak perlu differencing.\n")
}
##
## Data sudah stasioner, tidak perlu differencing.
# âż§ IDENTIFIKASI TIME SERIES
# Plot time series data training
autoplot(train_ts) +
ggtitle("Plot Time Series Curah Hujan Bandung (Training 2020â2023)") +
xlab("Tahun") + ylab("Curah Hujan (mm)") +
theme_minimal()

# Dekomposisi data
dekomposisi <- decompose(train_ts, type = "additive")
autoplot(dekomposisi) +
ggtitle("Dekomposisi Time Series Curah Hujan (Additive Model)") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## âš Please use `linewidth` instead.
## âš The deprecated feature was likely used in the forecast package.
## Please report the issue at <https://github.com/robjhyndman/forecast/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Boxplot musiman
boxplot(Curah_Hujan ~ Bulan, data = data_train,
main = "Pola Musiman Curah Hujan (2020â2023)",
xlab = "Bulan", ylab = "Curah Hujan (mm)",
col = "skyblue")

# ⿨ PEMODELAN EXPONENTIAL SMOOTHING TRIPLE (HOLT-WINTER)
# Model Holt-Winters (Additive)
hw_model <- HoltWinters(train_ts, seasonal = "additive")
# Lihat hasil parameter smoothing
hw_model
## Holt-Winters exponential smoothing with trend and additive seasonal component.
##
## Call:
## HoltWinters(x = train_ts, seasonal = "additive")
##
## Smoothing parameters:
## alpha: 0
## beta : 0
## gamma: 0.2529663
##
## Coefficients:
## [,1]
## a 140.603676
## b -1.281949
## s1 -57.971796
## s2 -31.353429
## s3 91.667702
## s4 67.886407
## s5 63.113306
## s6 -65.725114
## s7 -118.162011
## s8 -125.515738
## s9 -79.345660
## s10 89.162344
## s11 109.264516
## s12 122.965327
# Konversi data testing (2024) menjadi objek time series
test_ts <- ts(data_test$Curah_Hujan, start = c(2024, 1), frequency = 12)
# Plot hasil fitting
plot(hw_model, main = "Model Holt-Winters (Additive) Curah Hujan Bandung",
ylab = "Curah Hujan (mm)", xlab = "Waktu")

# Peramalan 12 bulan ke depan (2024)
hw_forecast <- forecast(hw_model, h = 12)
# Plot hasil peramalan
plot(hw_forecast, main = "Peramalan Curah Hujan 2024 (Holt-Winters Additive)",
ylab = "Curah Hujan (mm)", xlab = "Tahun")

# Akurasi model Holt-Winters
accuracy(hw_forecast, test_ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 7.245788 83.2868 59.86385 -20.53533 51.12713 0.7038666
## Test set 46.813507 142.8309 116.88674 -3.70363 83.21212 1.3743296
## ACF1 Theil's U
## Training set -0.09587046 NA
## Test set -0.34781788 0.7602152
# ⿊PEMODELAN REGRESI TIME SERIES (MUSIMAN)
# Model regresi time series dengan trend dan musiman
reg_model <- tslm(train_ts ~ season)
# Ringkasan model
summary(reg_model)
##
## Call:
## tslm(formula = train_ts ~ season)
##
## Residuals:
## Min 1Q Median 3Q Max
## -181.525 -53.744 -1.525 38.519 157.250
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 120.12 38.49 3.121 0.00354 **
## season2 59.63 54.43 1.095 0.28058
## season3 135.47 54.43 2.489 0.01757 *
## season4 145.00 54.43 2.664 0.01148 *
## season5 116.60 54.43 2.142 0.03901 *
## season6 -29.38 54.43 -0.540 0.59272
## season7 -65.20 54.43 -1.198 0.23878
## season8 -71.70 54.43 -1.317 0.19605
## season9 -29.83 54.43 -0.548 0.58709
## season10 123.40 54.43 2.267 0.02948 *
## season11 181.75 54.43 3.339 0.00196 **
## season12 155.68 54.43 2.860 0.00700 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 76.97 on 36 degrees of freedom
## Multiple R-squared: 0.6448, Adjusted R-squared: 0.5363
## F-statistic: 5.941 on 11 and 36 DF, p-value: 2.058e-05
# Plot fitting model
plot(train_ts, main = "Fitting Regresi Time Series Curah Hujan (Training)")
lines(fitted(reg_model), col = "red")

# Peramalan 12 bulan ke depan
reg_forecast <- forecast(reg_model, h = 12)
# Plot hasil peramalan
plot(reg_forecast, main = "Peramalan Regresi Time Series Curah Hujan 2024",
ylab = "Curah Hujan (mm)", xlab = "Tahun")

# Akurasi model regresi time series
accuracy(reg_forecast, test_ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 4.290116e-15 66.66049 51.07812 -27.83304 48.99192 0.6005658
## Test set 4.339583e+00 127.66896 102.52292 -43.30994 81.17429 1.2054429
## ACF1 Theil's U
## Training set 0.1096094 NA
## Test set -0.3384045 0.5523028
# đ PEMODELAN SARIMA (SEASONAL ARIMA)
# Pemodelan SARIMA otomatis
sarima_model <- auto.arima(train_ts, seasonal = TRUE, stepwise = FALSE, approximation = FALSE)
summary(sarima_model)
## Series: train_ts
## ARIMA(0,0,0)(0,1,1)[12]
##
## Coefficients:
## sma1
## -0.7243
## s.e. 0.5088
##
## sigma^2 = 7737: log likelihood = -215.73
## AIC=435.46 AICc=435.83 BIC=438.63
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -14.64359 75.11008 53.24365 -38.25788 55.80402 0.6260277 0.0444806
# Diagnostik residual SARIMA
checkresiduals(sarima_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,0,0)(0,1,1)[12]
## Q* = 9.0125, df = 9, p-value = 0.4361
##
## Model df: 1. Total lags used: 10
# Peramalan 12 bulan ke depan
sarima_forecast <- forecast(sarima_model, h = 12)
# Plot hasil peramalan
plot(sarima_forecast, main = "Peramalan Curah Hujan 2024 (Model SARIMA)",
ylab = "Curah Hujan (mm)", xlab = "Tahun")

# Akurasi model SARIMA
accuracy(sarima_forecast, test_ts)
## ME RMSE MAE MPE MAPE MASE
## Training set -14.643587 75.11008 53.24365 -38.25788 55.80402 0.6260277
## Test set 8.138647 130.30404 105.40688 -40.52971 81.39865 1.2393520
## ACF1 Theil's U
## Training set 0.0444806 NA
## Test set -0.2989930 0.5564026
# 1⿥ PERBANDINGAN KINERJA ANTAR MODEL
# Ambil nilai RMSE, MAE, dan MAPE dari setiap model
akurasi_hw <- accuracy(hw_forecast, test_ts)[2, c("RMSE", "MAE", "MAPE")]
akurasi_reg <- accuracy(reg_forecast, test_ts)[2, c("RMSE", "MAE", "MAPE")]
akurasi_sarima <- accuracy(sarima_forecast, test_ts)[2, c("RMSE", "MAE", "MAPE")]
# Gabungkan hasil ke dalam satu tabel
akurasi_model <- rbind(
Holt_Winters_Additive = akurasi_hw,
Regresi_TimeSeries = akurasi_reg,
SARIMA = akurasi_sarima
)
print("đ Perbandingan Akurasi Model Peramalan Curah Hujan Tahun 2024:")
## [1] "đ Perbandingan Akurasi Model Peramalan Curah Hujan Tahun 2024:"
print(akurasi_model)
## RMSE MAE MAPE
## Holt_Winters_Additive 142.8309 116.8867 83.21212
## Regresi_TimeSeries 127.6690 102.5229 81.17429
## SARIMA 130.3040 105.4069 81.39865
# Visualisasi perbandingan MAPE
barplot(akurasi_model[, "MAPE"],
names.arg = rownames(akurasi_model),
col = c("skyblue", "salmon", "lightgreen"),
main = "Perbandingan MAPE Model Peramalan Curah Hujan 2024",
ylab = "MAPE (%)")
