library(readxl)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(tseries)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
# 1️⃣ Import dan pembersihan data
data <- read_excel("C:/Users/HP/Downloads/Data curah hujan 2020-2024 di Bandung (1).xlsx",
sheet = "Sheet2")
colnames(data) <- c("Tanggal", "Curah_Hujan")
data <- na.omit(data)
data$Tanggal <- as.Date(data$Tanggal)
# Tambah kolom Tahun dan Bulan
data <- data %>%
mutate(
Tahun = format(Tanggal, "%Y"),
Bulan = format(Tanggal, "%B")
)
# 2️⃣ Pemisahan Data Training (2020–2023) & Testing (2024)
data_train <- data %>% filter(Tahun %in% c("2020", "2021", "2022", "2023"))
data_test <- data %>% filter(Tahun == "2024")
# 3️⃣ Statistik Deskriptif
cat("===== Statistik Deskriptif Keseluruhan (2020–2024) =====\n")
## ===== Statistik Deskriptif Keseluruhan (2020–2024) =====
print(summary(data$Curah_Hujan))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.0 79.0 166.7 181.1 271.2 512.0
cat("\nStandar Deviasi:", sd(data$Curah_Hujan), "\n")
##
## Standar Deviasi: 118.0585
cat("\n===== Statistik Deskriptif Training (2020–2023) =====\n")
##
## ===== Statistik Deskriptif Training (2020–2023) =====
print(summary(data_train$Curah_Hujan))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.00 84.25 179.75 180.24 272.25 454.30
cat("\nStandar Deviasi:", sd(data_train$Curah_Hujan), "\n")
##
## Standar Deviasi: 113.0322
cat("\n===== Statistik Deskriptif Testing (2024) =====\n")
##
## ===== Statistik Deskriptif Testing (2024) =====
print(summary(data_test$Curah_Hujan))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 30.0 78.5 146.0 184.6 266.0 512.0
cat("\nStandar Deviasi:", sd(data_test$Curah_Hujan), "\n")
##
## Standar Deviasi: 141.9555
# Statistik per tahun
print(
data %>%
group_by(Tahun) %>%
summarise(
Rata_rata = mean(Curah_Hujan),
SD = sd(Curah_Hujan),
Minimum = min(Curah_Hujan),
Maksimum = max(Curah_Hujan)
)
)
## # A tibble: 5 × 5
## Tahun Rata_rata SD Minimum Maksimum
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2020 202. 115. 30 337
## 2 2021 181. 114. 33.2 454.
## 3 2022 193. 111. 29.9 367.
## 4 2023 146. 118. 18 365
## 5 2024 185. 142. 30 512
# 4️⃣ Visualisasi Data
# a. Time Series Plot dengan Garis Tren
ggplot(data, aes(x = Tanggal, y = Curah_Hujan)) +
geom_line(color = "black", linewidth = 0.5) +
geom_point(color = "darkred", size = 1.5) +
geom_smooth(method = "loess", se = FALSE, color = "orange", linewidth = 1) +
ggtitle("Curah Hujan Bulanan di Kota Bandung (2020–2024)") +
xlab("Tahun") +
ylab("Curah Hujan (mm)") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# b. Boxplot Keseluruhan per Tahun
ggplot(data, aes(x = Tahun, y = Curah_Hujan, fill = Tahun)) +
geom_boxplot(alpha = 0.7) +
ggtitle("Boxplot Curah Hujan per Tahun (2020–2024)") +
xlab("Tahun") +
ylab("Curah Hujan (mm)") +
theme_minimal() +
theme(legend.position = "none")

# c. Boxplot Seluruh Data
ggplot(data, aes(y = Curah_Hujan)) +
geom_boxplot(fill = "steelblue", alpha = 0.8) +
ggtitle("Boxplot Seluruh Data Curah Hujan Bandung (2020–2024)") +
ylab("Curah Hujan (mm)") +
theme_minimal()

# d. Boxplot Training dan Testing
data$Periode <- ifelse(data$Tahun == "2024", "Testing (2024)", "Training (2020–2023)")
ggplot(data, aes(x = Periode, y = Curah_Hujan, fill = Periode)) +
geom_boxplot(alpha = 0.7) +
ggtitle("Boxplot Curah Hujan Training dan Testing") +
xlab("") +
ylab("Curah Hujan (mm)") +
theme_minimal() +
theme(legend.position = "none")

# e. Boxplot per Bulan (urut sesuai kalender)
data$Bulan <- factor(
data$Bulan,
levels = c(
"January", "February", "March", "April", "May", "June",
"July", "August", "September", "October", "November", "December"
),
ordered = TRUE
)
ggplot(data, aes(x = Bulan, y = Curah_Hujan, fill = Bulan)) +
geom_boxplot(alpha = 0.8) +
ggtitle("Boxplot Curah Hujan per Bulan (2020–2024)") +
xlab("Bulan") +
ylab("Curah Hujan (mm)") +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none"
)

# 5️⃣ PLOT ACF DAN PACF DATA TRAINING
# Konversi data training ke time series
train_ts <- ts(data_train$Curah_Hujan, start = c(2020, 1), frequency = 12)
test_ts <- ts(data_test$Curah_Hujan, start = c(2024, 1), frequency = 12)
# Plot ACF & PACF untuk data training
par(mfrow = c(1, 2))
acf(train_ts, main = "Plot ACF Curah Hujan")
pacf(train_ts, main = "Plot PACF Curah Hujan")

par(mfrow = c(1, 1))
# 6️⃣ UJI STASIONERITAS (ADF TEST)
cat("\n===== UJI STASIONERITAS (ADF TEST) DATA TRAINING =====\n")
##
## ===== UJI STASIONERITAS (ADF TEST) DATA TRAINING =====
adf_result <- adf.test(train_ts)
## Warning in adf.test(train_ts): p-value smaller than printed p-value
print(adf_result)
##
## Augmented Dickey-Fuller Test
##
## data: train_ts
## Dickey-Fuller = -5.402, Lag order = 3, p-value = 0.01
## alternative hypothesis: stationary
# Jika data tidak stasioner (p-value > 0.05), lakukan differencing
if (adf_result$p.value > 0.05) {
cat("\nData tidak stasioner. Melakukan differencing 1x...\n")
diff_train <- diff(train_ts)
# Plot data setelah differencing
autoplot(diff_train) +
ggtitle("Data Training Setelah Differencing (1x)") +
xlab("Tahun") + ylab("Perubahan Curah Hujan (mm)") +
theme_minimal()
# Plot ACF & PACF setelah differencing
par(mfrow = c(1, 2))
acf(diff_train, main = "ACF Setelah Differencing (1x)")
pacf(diff_train, main = "PACF Setelah Differencing (1x)")
par(mfrow = c(1, 1))
} else {
cat("\nData sudah stasioner, tidak perlu differencing.\n")
}
##
## Data sudah stasioner, tidak perlu differencing.
# 7️⃣ IDENTIFIKASI TIME SERIES
# Plot time series data training
autoplot(train_ts) +
ggtitle("Plot Time Series Curah Hujan Bandung (Training 2020–2023)") +
xlab("Tahun") + ylab("Curah Hujan (mm)") +
theme_minimal()

# Dekomposisi data
dekomposisi <- decompose(train_ts, type = "additive")
autoplot(dekomposisi) +
ggtitle("Dekomposisi Time Series Curah Hujan (Additive Model)") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the forecast package.
## Please report the issue at <https://github.com/robjhyndman/forecast/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Boxplot musiman
boxplot(Curah_Hujan ~ Bulan, data = data_train,
main = "Pola Musiman Curah Hujan (2020–2023)",
xlab = "Bulan", ylab = "Curah Hujan (mm)",
col = "skyblue")

# 8️⃣ PEMODELAN EXPONENTIAL SMOOTHING TRIPLE (HOLT-WINTER)
# Model Holt-Winters (Additive)
hw_model <- HoltWinters(train_ts, seasonal = "additive")
# Lihat hasil parameter smoothing
hw_model
## Holt-Winters exponential smoothing with trend and additive seasonal component.
##
## Call:
## HoltWinters(x = train_ts, seasonal = "additive")
##
## Smoothing parameters:
## alpha: 0
## beta : 0
## gamma: 0.2529663
##
## Coefficients:
## [,1]
## a 140.603676
## b -1.281949
## s1 -57.971796
## s2 -31.353429
## s3 91.667702
## s4 67.886407
## s5 63.113306
## s6 -65.725114
## s7 -118.162011
## s8 -125.515738
## s9 -79.345660
## s10 89.162344
## s11 109.264516
## s12 122.965327
# Plot hasil fitting
plot(hw_model, main = "Model Holt-Winters (Additive) Curah Hujan Bandung",
ylab = "Curah Hujan (mm)", xlab = "Waktu")

# Peramalan 12 bulan ke depan (2024)
hw_forecast <- forecast(hw_model, h = 12)
# Plot hasil peramalan
plot(hw_forecast, main = "Peramalan Curah Hujan 2024 (Holt-Winters Additive)",
ylab = "Curah Hujan (mm)", xlab = "Tahun")

# Akurasi model Holt-Winters
accuracy(hw_forecast, test_ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 7.245788 83.2868 59.86385 -20.53533 51.12713 0.7038666
## Test set 46.813507 142.8309 116.88674 -3.70363 83.21212 1.3743296
## ACF1 Theil's U
## Training set -0.09587046 NA
## Test set -0.34781788 0.7602152
# 9️⃣PEMODELAN REGRESI TIME SERIES (MUSIMAN)
# Model regresi time series dengan musiman
reg_model <- tslm(train_ts ~ season)
# Ringkasan model
summary(reg_model)
##
## Call:
## tslm(formula = train_ts ~ season)
##
## Residuals:
## Min 1Q Median 3Q Max
## -181.525 -53.744 -1.525 38.519 157.250
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 120.12 38.49 3.121 0.00354 **
## season2 59.63 54.43 1.095 0.28058
## season3 135.47 54.43 2.489 0.01757 *
## season4 145.00 54.43 2.664 0.01148 *
## season5 116.60 54.43 2.142 0.03901 *
## season6 -29.38 54.43 -0.540 0.59272
## season7 -65.20 54.43 -1.198 0.23878
## season8 -71.70 54.43 -1.317 0.19605
## season9 -29.83 54.43 -0.548 0.58709
## season10 123.40 54.43 2.267 0.02948 *
## season11 181.75 54.43 3.339 0.00196 **
## season12 155.68 54.43 2.860 0.00700 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 76.97 on 36 degrees of freedom
## Multiple R-squared: 0.6448, Adjusted R-squared: 0.5363
## F-statistic: 5.941 on 11 and 36 DF, p-value: 2.058e-05
# Plot fitting model
plot(train_ts, main = "Fitting Regresi Time Series Curah Hujan (Training)")
lines(fitted(reg_model), col = "red")

# Peramalan 12 bulan ke depan
reg_forecast <- forecast(reg_model, h = 12)
# Plot hasil peramalan
plot(reg_forecast, main = "Peramalan Regresi Time Series Curah Hujan 2024",
ylab = "Curah Hujan (mm)", xlab = "Tahun")

# Akurasi model regresi time series
accuracy(reg_forecast, test_ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 4.290116e-15 66.66049 51.07812 -27.83304 48.99192 0.6005658
## Test set 4.339583e+00 127.66896 102.52292 -43.30994 81.17429 1.2054429
## ACF1 Theil's U
## Training set 0.1096094 NA
## Test set -0.3384045 0.5523028
# 🔟 PEMODELAN SARIMA
# Model Linear
fit <- lm(train_ts ~ 1)
fit
##
## Call:
## lm(formula = train_ts ~ 1)
##
## Coefficients:
## (Intercept)
## 180.2
# BOX COX
bc <- boxcox(fit,
lambda = seq(-2, 2, by = 0.1), # rentang lambda
plotit = TRUE) # grafik

# Ambil lambda optimal dan CI 95%
lambda_opt <- bc$x[which.max(bc$y)]
ci_95 <- bc$x[bc$y > max(bc$y) - 0.5 * qchisq(0.95, df = 1)]
cat("Lambda optimal =", lambda_opt, "\n")
## Lambda optimal = 0.5050505
cat("CI 95% =", range(ci_95), "\n")
## CI 95% = 0.1818182 0.9090909
# BOX–COX TRANSFORMATION
# Cari lambda optimal (metode Guerrero)
lambda_opt <- BoxCox.lambda(train_ts, method = "guerrero")
lambda_opt
## [1] 1.133134
# Transformasi data
train_ts_bc <- BoxCox(train_ts, lambda_opt)
# Plot perbandingan
par(mfrow = c(2,1))
plot(train_ts, main = "Data Asli", ylab = "Curah Hujan")
plot(train_ts_bc, main = paste("Data Setelah Box–Cox (lambda =", round(lambda_opt,3), ")"))

par(mfrow = c(1,1))
# UJI STASIONERITAS NON-SEASONAL (ADF)
adf_before <- adf.test(train_ts) # sebelum Box–Cox
## Warning in adf.test(train_ts): p-value smaller than printed p-value
adf_after <- adf.test(train_ts_bc) # setelah Box–Cox
## Warning in adf.test(train_ts_bc): p-value smaller than printed p-value
print(adf_before)
##
## Augmented Dickey-Fuller Test
##
## data: train_ts
## Dickey-Fuller = -5.402, Lag order = 3, p-value = 0.01
## alternative hypothesis: stationary
print(adf_after)
##
## Augmented Dickey-Fuller Test
##
## data: train_ts_bc
## Dickey-Fuller = -5.3867, Lag order = 3, p-value = 0.01
## alternative hypothesis: stationary
train_season_diff <- diff(train_ts_bc, lag = 12, differences = 1)
adf.test(train_season_diff)
##
## Augmented Dickey-Fuller Test
##
## data: train_season_diff
## Dickey-Fuller = -2.6746, Lag order = 3, p-value = 0.3107
## alternative hypothesis: stationary
par(mfrow = c(1,2))
acf(train_ts_bc, main = "ACF Setelah Box-Cox")
pacf(train_ts_bc, main = "PACF Setelah Box-Cox")

par(mfrow = c(1,1))
# Karena 0.3107 > 0.05 maka tolak H0, sehingga tidak pakai D = 1
# MODEL SARIMA D = 0
final_model <- Arima(train_ts_bc,
order = c(1,0,1),
seasonal = c(0,0,0)) # no seasonal component
summary(final_model)
## Series: train_ts_bc
## ARIMA(1,0,1) with non-zero mean
##
## Coefficients:
## ar1 ma1 mean
## -0.2572 0.7840 328.2016
## s.e. 0.3189 0.2517 39.7981
##
## sigma^2 = 40656: log likelihood = -321.56
## AIC=651.11 AICc=652.04 BIC=658.6
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.2080451 195.2314 168.1958 -103.8131 135.1982 0.9850177
## ACF1
## Training set 0.02989791
# Forecast 12 Bulan ke depan
forecast_final <- forecast(final_model, h = 12)
plot(forecast_final, main="Peramalan 2024 (SARIMA Final)")
