library(readxl)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(tseries)
#Import dan pembersihan data
data <- read_excel("C:/Users/HP/Downloads/Data curah hujan 2020-2024 di Bandung (1).xlsx", sheet = "Sheet2")
colnames(data) <- c("Tanggal", "Curah_Hujan")
data <- na.omit(data)
data$Tanggal <- as.Date(data$Tanggal)
# Tambah kolom Tahun dan Bulan
data <- data %>%
mutate(
Tahun = format(Tanggal, "%Y"),
Bulan = format(Tanggal, "%B")
)
#Pemisahan Data Training (2020–2023) & Testing (2024)
data_train <- data %>% filter(Tahun %in% c("2020", "2021", "2022", "2023"))
data_train
data_test <- data %>% filter(Tahun == "2024")
data_test
#Statistik Deskriptif
cat("===== Statistik Deskriptif Keseluruhan (2020–2024) =====\n")
## ===== Statistik Deskriptif Keseluruhan (2020–2024) =====
print(summary(data$Curah_Hujan))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.0 79.0 166.7 181.1 271.2 512.0
cat("\nStandar Deviasi:", sd(data$Curah_Hujan), "\n")
##
## Standar Deviasi: 118.0585
cat("\n===== Statistik Deskriptif Training (2020–2023) =====\n")
##
## ===== Statistik Deskriptif Training (2020–2023) =====
print(summary(data_train$Curah_Hujan))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.00 84.25 179.75 180.24 272.25 454.30
cat("\nStandar Deviasi:", sd(data_train$Curah_Hujan), "\n")
##
## Standar Deviasi: 113.0322
cat("\n===== Statistik Deskriptif Testing (2024) =====\n")
##
## ===== Statistik Deskriptif Testing (2024) =====
print(summary(data_test$Curah_Hujan))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 30.0 78.5 146.0 184.6 266.0 512.0
cat("\nStandar Deviasi:", sd(data_test$Curah_Hujan), "\n")
##
## Standar Deviasi: 141.9555
# Statistik per tahun
cat("\n===== Statistik Curah Hujan per Tahun =====\n")
##
## ===== Statistik Curah Hujan per Tahun =====
print(
data %>%
group_by(Tahun) %>%
summarise(
Rata_rata = mean(Curah_Hujan),
SD = sd(Curah_Hujan),
Minimum = min(Curah_Hujan),
Maksimum = max(Curah_Hujan)
)
)
## # A tibble: 5 × 5
## Tahun Rata_rata SD Minimum Maksimum
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2020 202. 115. 30 337
## 2 2021 181. 114. 33.2 454.
## 3 2022 193. 111. 29.9 367.
## 4 2023 146. 118. 18 365
## 5 2024 185. 142. 30 512
#Visualisasi Data
#a.Time Series Plot dengan Garis Tren
ggplot(data, aes(x = Tanggal, y = Curah_Hujan)) +
geom_line(color = "black", linewidth = 0.5) +
geom_point(color = "darkred", size = 1.5) +
geom_smooth(method = "loess", se = FALSE, color = "orange", linewidth = 1) +
ggtitle("Curah Hujan Bulanan di Kota Bandung (2020–2024)") +
xlab("Tahun") +
ylab("Curah Hujan (mm)") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

#b.Boxplot Keseluruhan per Tahun
ggplot(data, aes(x = Tahun, y = Curah_Hujan, fill = Tahun)) +
geom_boxplot(alpha = 0.7) +
ggtitle("Boxplot Curah Hujan per Tahun (2020–2024)") +
xlab("Tahun") +
ylab("Curah Hujan (mm)") +
theme_minimal() +
theme(legend.position = "none")

#c.Boxplot Seluruh Data
ggplot(data, aes(y = Curah_Hujan)) +
geom_boxplot(fill = "steelblue", alpha = 0.8) +
ggtitle("Boxplot Seluruh Data Curah Hujan Bandung (2020–2024)") +
ylab("Curah Hujan (mm)") +
theme_minimal()

#d.Boxplot Training dan Testing
data$Periode <- ifelse(data$Tahun == "2024", "Testing (2024)", "Training (2020–2023)")
ggplot(data, aes(x = Periode, y = Curah_Hujan, fill = Periode)) +
geom_boxplot(alpha = 0.7) +
ggtitle("Boxplot Curah Hujan Training dan Testing") +
xlab("") +
ylab("Curah Hujan (mm)") +
theme_minimal() +
theme(legend.position = "none")

#e.Boxplot per Bulan
ggplot(data, aes(x = Bulan, y = Curah_Hujan, fill = Bulan)) +
geom_boxplot(alpha = 0.8) +
ggtitle("Boxplot Curah Hujan per Bulan (2020–2024)") +
xlab("Bulan") + ylab("Curah Hujan (mm)") +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none"
)

#PLOT ACF DAN PACF DATA TRAINING
#Konversi data training ke time series
train_ts <- ts(data_train$Curah_Hujan, start = c(2020, 1), frequency = 12)
train_ts
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 2020 207.6 337.0 291.0 271.0 292.0 30.0 64.0 42.0 88.0 327.0 207.0 262.0
## 2021 146.4 153.9 292.5 177.3 239.0 92.4 33.2 91.8 73.0 218.4 454.3 198.5
## 2022 59.5 117.1 238.9 336.2 146.9 150.6 98.5 29.9 182.2 366.7 307.2 277.7
## 2023 67.0 111.0 200.0 276.0 269.0 90.0 24.0 30.0 18.0 62.0 239.0 365.0
# Plot ACF & PACF untuk data training
par(mfrow = c(1, 2))
acf(train_ts, main = "Plot ACF Curah Hujan")
pacf(train_ts, main = "Plot PACF Curah Hujan")

par(mfrow = c(1, 1))
#UJI STASIONERITAS (ADF TEST)
cat("\n===== UJI STASIONERITAS (ADF TEST) DATA TRAINING =====\n")
##
## ===== UJI STASIONERITAS (ADF TEST) DATA TRAINING =====
adf_result <- adf.test(train_ts)
## Warning in adf.test(train_ts): p-value smaller than printed p-value
print(adf_result)
##
## Augmented Dickey-Fuller Test
##
## data: train_ts
## Dickey-Fuller = -5.402, Lag order = 3, p-value = 0.01
## alternative hypothesis: stationary
#Jika data tidak stasioner (p-value > 0.05), lakukan differencing
if (adf_result$p.value > 0.05) {
cat("\nData tidak stasioner. Melakukan differencing 1x...\n")
diff_train <- diff(train_ts)
#Plot data setelah differencing
autoplot(diff_train) +
ggtitle("Data Training Setelah Differencing (1x)") +
xlab("Tahun") + ylab("Perubahan Curah Hujan (mm)") +
theme_minimal()
#Plot ACF & PACF setelah differencing
par(mfrow = c(1, 2))
acf(diff_train, main = "ACF Setelah Differencing (1x)")
pacf(diff_train, main = "PACF Setelah Differencing (1x)")
par(mfrow = c(1, 1))
} else {
cat("\nData sudah stasioner, tidak perlu differencing.\n")
}
##
## Data sudah stasioner, tidak perlu differencing.