library(forecast)
## Warning: package 'forecast' was built under R version 4.5.3
library(tseries)
## Warning: package 'tseries' was built under R version 4.5.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(readxl)
## Warning: package 'readxl' was built under R version 4.5.2
library(zoo)
## Warning: package 'zoo' was built under R version 4.5.3
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
data_raw <- read_excel(
  "C:/Users/ASUS/Downloads/Topik Dalam Statistika/laporan_iklim_harian-Kobar.xlsx",
  skip = 7
)

head(data_raw)
## # A tibble: 6 × 2
##   TANGGAL       RR
##   <chr>      <dbl>
## 1 01-01-2025   0  
## 2 02-01-2025   3.5
## 3 03-01-2025   2.6
## 4 04-01-2025   0  
## 5 05-01-2025  15.4
## 6 06-01-2025   0
str(data_raw)
## tibble [372 × 2] (S3: tbl_df/tbl/data.frame)
##  $ TANGGAL: chr [1:372] "01-01-2025" "02-01-2025" "03-01-2025" "04-01-2025" ...
##  $ RR     : num [1:372] 0 3.5 2.6 0 15.4 ...
summary(data_raw)
##    TANGGAL                RR        
##  Length:372         Min.   :   0.0  
##  Class :character   1st Qu.:   0.0  
##  Mode  :character   Median :   4.5  
##                     Mean   : 741.7  
##                     3rd Qu.:  21.0  
##                     Max.   :8888.0  
##                     NA's   :7
#pembersihan data
# hapus kolom kosong

data_raw <- data_raw[, 1:2]

# mengubah nama kolom

colnames(data_raw) <- c("Tanggal", "Curah_Hujan")

# mengubah tanggal menjadi format Date

data_raw$Tanggal <- as.Date(
  data_raw$Tanggal,
  format = "%d-%m-%Y"
)

# mengubah data curah hujan menjadi numerik

data_raw$Curah_Hujan <- as.numeric(data_raw$Curah_Hujan)

# mengecek jumlah data 8888 dan 9999

sum(data_raw$Curah_Hujan == 8888, na.rm = TRUE)
## [1] 30
sum(data_raw$Curah_Hujan == 9999, na.rm = TRUE)
## [1] 0
# mengganti nilai 8888 dan 9999 menjadi NA

data_raw$Curah_Hujan[
  data_raw$Curah_Hujan %in% c(8888, 9999)
] <- NA

# menghitung rata-rata tanpa NA

rata_rata <- mean(
  data_raw$Curah_Hujan,
  na.rm = TRUE
)

rata_rata
## [1] 12.19045
# mengganti NA dengan rata-rata

data_raw$Curah_Hujan[
  is.na(data_raw$Curah_Hujan)
] <- rata_rata

# mengurutkan data berdasarkan tanggal

data_raw <- data_raw[
  order(data_raw$Tanggal),
]

# mengecek missing value

sum(is.na(data_raw$Curah_Hujan))
## [1] 0
head(data_raw)
## # A tibble: 6 × 2
##   Tanggal    Curah_Hujan
##   <date>           <dbl>
## 1 2025-01-01         0  
## 2 2025-01-02         3.5
## 3 2025-01-03         2.6
## 4 2025-01-04         0  
## 5 2025-01-05        15.4
## 6 2025-01-06         0
#buat time series dan uji stasioneritas
ts_data <- ts(data_raw$Curah_Hujan)

ts.plot(
  ts_data,
  main = "Plot Data Curah Hujan",
  ylab = "Curah Hujan (mm)",
  xlab = "Waktu",
  col = "blue"
)

adf_awal <- adf.test(ts_data)
## Warning in adf.test(ts_data): p-value smaller than printed p-value
adf_awal
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_data
## Dickey-Fuller = -5.9556, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
#differencing
diff1 <- diff(ts_data)

ts.plot(
  diff1,
  main = "Differencing Orde 1",
  ylab = "Differencing",
  xlab = "Waktu",
  col = "red"
)

adf_diff <- adf.test(diff1)
## Warning in adf.test(diff1): p-value smaller than printed p-value
adf_diff
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff1
## Dickey-Fuller = -10.832, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
#plot acf dan pacf
acf(
  diff1,
  main = "Plot ACF"
)

pacf(
  diff1,
  main = "Plot PACF"
)

#splitting data
# Jumlah total data
n <- length(ts_data)

# Menentukan ukuran data training (80%)
train_size <- floor(0.8 * n)

# Membagi data
train <- ts_data[1:train_size]

test <- ts_data[(train_size + 1):n]

# Melihat jumlah data
length(train)
## [1] 297
length(test)
## [1] 75
# Persentase data
cat("Jumlah Data Training :", length(train), "\n")
## Jumlah Data Training : 297
cat("Jumlah Data Testing  :", length(test), "\n")
## Jumlah Data Testing  : 75
cat("Persentase Training :", 
    round(length(train)/n * 100, 2), "%\n")
## Persentase Training : 79.84 %
cat("Persentase Testing  :", 
    round(length(test)/n * 100, 2), "%\n")
## Persentase Testing  : 20.16 %
#pemodelan arima
model_arima <- auto.arima(
  train,
  seasonal = FALSE
)

summary(model_arima)
## Series: train 
## ARIMA(0,0,0) with non-zero mean 
## 
## Coefficients:
##          mean
##       11.5925
## s.e.   1.2185
## 
## sigma^2 = 442.5:  log likelihood = -1325.65
## AIC=2655.29   AICc=2655.33   BIC=2662.68
## 
## Training set error measures:
##                        ME     RMSE      MAE  MPE MAPE      MASE          ACF1
## Training set 8.669113e-12 20.99979 12.89127 -Inf  Inf 0.7832408 -0.0008396416
#forecasting arima
forecast_arima <- forecast(
  model_arima,
  h = length(test)
)

plot(
  forecast_arima,
  main = "Forecasting ARIMA"
)

#evaluasi model arima
akurasi_arima <- accuracy(
  forecast_arima,
  test
)

akurasi_arima
##                        ME     RMSE      MAE  MPE MAPE      MASE          ACF1
## Training set 8.669113e-12 20.99979 12.89127 -Inf  Inf 0.7832408 -0.0008396416
## Test set     2.965691e+00 16.50476 10.53452 -Inf  Inf 0.6400506            NA
#pemodelan sarimas
model_sarima <- auto.arima(
  train,
  seasonal = TRUE
)

summary(model_sarima)
## Series: train 
## ARIMA(0,0,0) with non-zero mean 
## 
## Coefficients:
##          mean
##       11.5925
## s.e.   1.2185
## 
## sigma^2 = 442.5:  log likelihood = -1325.65
## AIC=2655.29   AICc=2655.33   BIC=2662.68
## 
## Training set error measures:
##                        ME     RMSE      MAE  MPE MAPE      MASE          ACF1
## Training set 8.669113e-12 20.99979 12.89127 -Inf  Inf 0.7832408 -0.0008396416
#forecasting sarima
forecast_sarima <- forecast(
  model_sarima,
  h = length(test)
)

plot(
  forecast_sarima,
  main = "Forecasting SARIMA"
)

#evaluasi model sarima
akurasi_sarima <- accuracy(
  forecast_sarima,
  test
)

akurasi_sarima
##                        ME     RMSE      MAE  MPE MAPE      MASE          ACF1
## Training set 8.669113e-12 20.99979 12.89127 -Inf  Inf 0.7832408 -0.0008396416
## Test set     2.965691e+00 16.50476 10.53452 -Inf  Inf 0.6400506            NA
#perbandingan model
cat("\nAkurasi Model ARIMA\n")
## 
## Akurasi Model ARIMA
print(akurasi_arima)
##                        ME     RMSE      MAE  MPE MAPE      MASE          ACF1
## Training set 8.669113e-12 20.99979 12.89127 -Inf  Inf 0.7832408 -0.0008396416
## Test set     2.965691e+00 16.50476 10.53452 -Inf  Inf 0.6400506            NA
cat("\nAkurasi Model SARIMA\n")
## 
## Akurasi Model SARIMA
print(akurasi_sarima)
##                        ME     RMSE      MAE  MPE MAPE      MASE          ACF1
## Training set 8.669113e-12 20.99979 12.89127 -Inf  Inf 0.7832408 -0.0008396416
## Test set     2.965691e+00 16.50476 10.53452 -Inf  Inf 0.6400506            NA
#forecast data depan
forecast_30hari <- forecast(
  model_sarima,
  h = 30
)

plot(
  forecast_30hari,
  main = "Forecast Curah Hujan 30 Hari Kedepan",
  ylab = "Curah Hujan (mm)",
  xlab = "Waktu"
)

forecast_30hari
##     Point Forecast     Lo 80    Hi 80     Lo 95    Hi 95
## 298       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 299       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 300       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 301       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 302       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 303       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 304       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 305       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 306       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 307       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 308       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 309       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 310       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 311       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 312       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 313       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 314       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 315       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 316       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 317       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 318       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 319       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 320       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 321       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 322       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 323       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 324       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 325       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 326       11.59253 -15.36522 38.55027 -29.63578 52.82083
## 327       11.59253 -15.36522 38.55027 -29.63578 52.82083