library(forecast)
## Warning: package 'forecast' was built under R version 4.5.3
library(tseries)
## Warning: package 'tseries' was built under R version 4.5.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(readxl)
## Warning: package 'readxl' was built under R version 4.5.2
library(zoo)
## Warning: package 'zoo' was built under R version 4.5.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
data_raw <- read_excel(
"C:/Users/ASUS/Downloads/Topik Dalam Statistika/laporan_iklim_harian-Kobar.xlsx",
skip = 7
)
head(data_raw)
## # A tibble: 6 × 2
## TANGGAL RR
## <chr> <dbl>
## 1 01-01-2025 0
## 2 02-01-2025 3.5
## 3 03-01-2025 2.6
## 4 04-01-2025 0
## 5 05-01-2025 15.4
## 6 06-01-2025 0
str(data_raw)
## tibble [372 × 2] (S3: tbl_df/tbl/data.frame)
## $ TANGGAL: chr [1:372] "01-01-2025" "02-01-2025" "03-01-2025" "04-01-2025" ...
## $ RR : num [1:372] 0 3.5 2.6 0 15.4 ...
summary(data_raw)
## TANGGAL RR
## Length:372 Min. : 0.0
## Class :character 1st Qu.: 0.0
## Mode :character Median : 4.5
## Mean : 741.7
## 3rd Qu.: 21.0
## Max. :8888.0
## NA's :7
#pembersihan data
# hapus kolom kosong
data_raw <- data_raw[, 1:2]
# mengubah nama kolom
colnames(data_raw) <- c("Tanggal", "Curah_Hujan")
# mengubah tanggal menjadi format Date
data_raw$Tanggal <- as.Date(
data_raw$Tanggal,
format = "%d-%m-%Y"
)
# mengubah data curah hujan menjadi numerik
data_raw$Curah_Hujan <- as.numeric(data_raw$Curah_Hujan)
# mengecek jumlah data 8888 dan 9999
sum(data_raw$Curah_Hujan == 8888, na.rm = TRUE)
## [1] 30
sum(data_raw$Curah_Hujan == 9999, na.rm = TRUE)
## [1] 0
# mengganti nilai 8888 dan 9999 menjadi NA
data_raw$Curah_Hujan[
data_raw$Curah_Hujan %in% c(8888, 9999)
] <- NA
# menghitung rata-rata tanpa NA
rata_rata <- mean(
data_raw$Curah_Hujan,
na.rm = TRUE
)
rata_rata
## [1] 12.19045
# mengganti NA dengan rata-rata
data_raw$Curah_Hujan[
is.na(data_raw$Curah_Hujan)
] <- rata_rata
# mengurutkan data berdasarkan tanggal
data_raw <- data_raw[
order(data_raw$Tanggal),
]
# mengecek missing value
sum(is.na(data_raw$Curah_Hujan))
## [1] 0
head(data_raw)
## # A tibble: 6 × 2
## Tanggal Curah_Hujan
## <date> <dbl>
## 1 2025-01-01 0
## 2 2025-01-02 3.5
## 3 2025-01-03 2.6
## 4 2025-01-04 0
## 5 2025-01-05 15.4
## 6 2025-01-06 0
#buat time series dan uji stasioneritas
ts_data <- ts(data_raw$Curah_Hujan)
ts.plot(
ts_data,
main = "Plot Data Curah Hujan",
ylab = "Curah Hujan (mm)",
xlab = "Waktu",
col = "blue"
)

adf_awal <- adf.test(ts_data)
## Warning in adf.test(ts_data): p-value smaller than printed p-value
adf_awal
##
## Augmented Dickey-Fuller Test
##
## data: ts_data
## Dickey-Fuller = -5.9556, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
#differencing
diff1 <- diff(ts_data)
ts.plot(
diff1,
main = "Differencing Orde 1",
ylab = "Differencing",
xlab = "Waktu",
col = "red"
)

adf_diff <- adf.test(diff1)
## Warning in adf.test(diff1): p-value smaller than printed p-value
adf_diff
##
## Augmented Dickey-Fuller Test
##
## data: diff1
## Dickey-Fuller = -10.832, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
#plot acf dan pacf
acf(
diff1,
main = "Plot ACF"
)

pacf(
diff1,
main = "Plot PACF"
)

#splitting data
# Jumlah total data
n <- length(ts_data)
# Menentukan ukuran data training (80%)
train_size <- floor(0.8 * n)
# Membagi data
train <- ts_data[1:train_size]
test <- ts_data[(train_size + 1):n]
# Melihat jumlah data
length(train)
## [1] 297
length(test)
## [1] 75
# Persentase data
cat("Jumlah Data Training :", length(train), "\n")
## Jumlah Data Training : 297
cat("Jumlah Data Testing :", length(test), "\n")
## Jumlah Data Testing : 75
cat("Persentase Training :",
round(length(train)/n * 100, 2), "%\n")
## Persentase Training : 79.84 %
cat("Persentase Testing :",
round(length(test)/n * 100, 2), "%\n")
## Persentase Testing : 20.16 %
#pemodelan arima
model_arima <- auto.arima(
train,
seasonal = FALSE
)
summary(model_arima)
## Series: train
## ARIMA(0,0,0) with non-zero mean
##
## Coefficients:
## mean
## 11.5925
## s.e. 1.2185
##
## sigma^2 = 442.5: log likelihood = -1325.65
## AIC=2655.29 AICc=2655.33 BIC=2662.68
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 8.669113e-12 20.99979 12.89127 -Inf Inf 0.7832408 -0.0008396416
#forecasting arima
forecast_arima <- forecast(
model_arima,
h = length(test)
)
plot(
forecast_arima,
main = "Forecasting ARIMA"
)

#evaluasi model arima
akurasi_arima <- accuracy(
forecast_arima,
test
)
akurasi_arima
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 8.669113e-12 20.99979 12.89127 -Inf Inf 0.7832408 -0.0008396416
## Test set 2.965691e+00 16.50476 10.53452 -Inf Inf 0.6400506 NA
#pemodelan sarimas
model_sarima <- auto.arima(
train,
seasonal = TRUE
)
summary(model_sarima)
## Series: train
## ARIMA(0,0,0) with non-zero mean
##
## Coefficients:
## mean
## 11.5925
## s.e. 1.2185
##
## sigma^2 = 442.5: log likelihood = -1325.65
## AIC=2655.29 AICc=2655.33 BIC=2662.68
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 8.669113e-12 20.99979 12.89127 -Inf Inf 0.7832408 -0.0008396416
#forecasting sarima
forecast_sarima <- forecast(
model_sarima,
h = length(test)
)
plot(
forecast_sarima,
main = "Forecasting SARIMA"
)

#evaluasi model sarima
akurasi_sarima <- accuracy(
forecast_sarima,
test
)
akurasi_sarima
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 8.669113e-12 20.99979 12.89127 -Inf Inf 0.7832408 -0.0008396416
## Test set 2.965691e+00 16.50476 10.53452 -Inf Inf 0.6400506 NA
#perbandingan model
cat("\nAkurasi Model ARIMA\n")
##
## Akurasi Model ARIMA
print(akurasi_arima)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 8.669113e-12 20.99979 12.89127 -Inf Inf 0.7832408 -0.0008396416
## Test set 2.965691e+00 16.50476 10.53452 -Inf Inf 0.6400506 NA
cat("\nAkurasi Model SARIMA\n")
##
## Akurasi Model SARIMA
print(akurasi_sarima)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 8.669113e-12 20.99979 12.89127 -Inf Inf 0.7832408 -0.0008396416
## Test set 2.965691e+00 16.50476 10.53452 -Inf Inf 0.6400506 NA
#forecast data depan
forecast_30hari <- forecast(
model_sarima,
h = 30
)
plot(
forecast_30hari,
main = "Forecast Curah Hujan 30 Hari Kedepan",
ylab = "Curah Hujan (mm)",
xlab = "Waktu"
)

forecast_30hari
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 298 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 299 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 300 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 301 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 302 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 303 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 304 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 305 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 306 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 307 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 308 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 309 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 310 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 311 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 312 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 313 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 314 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 315 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 316 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 317 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 318 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 319 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 320 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 321 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 322 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 323 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 324 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 325 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 326 11.59253 -15.36522 38.55027 -29.63578 52.82083
## 327 11.59253 -15.36522 38.55027 -29.63578 52.82083