Data yang digunakan adalah data tinggi muka air (TMA) Laut Marina Ancol per jam dalam periode 1 Januari 2021 00:00 WIB - 1 Juni 2023 00:00 WIB.

Akan dilakukan prediksi tinggi muka air (TMA) Laut Marina Ancol dalam 6 jam ke depan dari data terakhir. Yaitu Prediksi Laut TMA Marina Ancol 1 Juni 2023 01:00 - 1 Juni 2023 06:00 WIB.

library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(forecast)
library(stats)
library(TSA)
## Registered S3 methods overwritten by 'TSA':
##   method       from    
##   fitted.Arima forecast
##   plot.Arima   forecast
## 
## Attaching package: 'TSA'
## The following object is masked from 'package:readr':
## 
##     spec
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## The following object is masked from 'package:utils':
## 
##     tar
# set theme for ggplot2 plots
theme_set(theme_bw())

1 Data Eksplorasi

# Read the CSV file "marina_complete.csv" into a data frame
marina_complete <- read.csv("D:/Kuliah/Mat/TSA Kominfo/Praktikum/5. marina_complete.csv")
head(marina_complete)
##                     ds   y
## 1 2021-01-01T00:00:00Z 170
## 2 2021-01-01T01:00:00Z 161
## 3 2021-01-01T02:00:00Z 172
## 4 2021-01-01T03:00:00Z 177
## 5 2021-01-01T04:00:00Z 181
## 6 2021-01-01T05:00:00Z 188
marina_complete$ds <- ymd_hms(marina_complete$ds)
# Filter the data for training set(October to December 2022)
marina_train <- marina_complete %>%
  filter(year(ds) == 2022 & month(ds) %in% 10:12)
head(marina_train)
##                    ds   y
## 1 2022-10-01 00:00:00 167
## 2 2022-10-01 01:00:00 168
## 3 2022-10-01 02:00:00 168
## 4 2022-10-01 03:00:00 171
## 5 2022-10-01 04:00:00 178
## 6 2022-10-01 05:00:00 181
# Filter the data for test set (January 2023, first 10 days)
marina_test <- marina_complete %>% 
  filter(year(ds) == 2023 & month(ds) == 1 & day(ds) %in% 1:10)
head(marina_test)
##                    ds   y
## 1 2023-01-01 00:00:00 159
## 2 2023-01-01 01:00:00 163
## 3 2023-01-01 02:00:00 176
## 4 2023-01-01 03:00:00 187
## 5 2023-01-01 04:00:00 194
## 6 2023-01-01 05:00:00 199
marina_train %>%
  #filter(between(x = ds,
  #              left = ymd_hm("2022-10-01 00:00"),
  #              right = ymd_hm("2022-10-15 00:00"))) %>%
  ggplot(aes(x = ds, y = y)) +
  geom_line() +
  geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

marina_train %>%
  filter(between(x = ds,
                left = ymd_hm("2022-10-01 00:00"),
                right = ymd_hm("2022-10-15 00:00"))) %>%
  ggplot(aes(x = ds, y = y)) +
  geom_line() +
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

marina_train %>%
  filter(between(x = ds,
                left = ymd_hm("2022-10-01 00:00"),
                right = ymd_hm("2022-10-01 23:00"))) %>%
  ggplot(aes(x = ds, y = y)) +
  geom_line() +
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

# Createa time series object with a frequency of 24
ts_marina <- ts(data = marina_train$y, frequency = 24)
# decompose the time series into its components(trend, seasonal, residual)
datacomponents = decompose(ts_marina, type = "additive")
plot(datacomponents)

# Create various plots to analyze the time series
plot.ts(ts_marina)

seasonplot(x = ts_marina,
           s = 24,
           col = rainbow(18))

monthplot(ts_marina)

boxplot(ts_marina ~ cycle(ts_marina))

2 Identifikasi Model

# Compute the autocorrelation function (ACF) of the time series (Non-Season)
acf0 <- ts_marina %>%
  acf(lag.max = 96)

plot(acf0, main = "ACF", xaxt = "n")
axis(1, at = 0:96/24, labels = 0:96)

# Extract and Plot significant lags from ACF (Seasonal)
acf0$lag <- acf0$lag*24
acf0.1 <- as.data.frame(cbind(acf0$acf, acf0$lag))
acf0.2 <- acf0.1[which(acf0.1$V2 %% 24 == 0), ]
barplot(height = acf0.2$V1,
        names.argr = acf0.2$V2,
        ylab = "ACF", xlab = "Lag")
## Warning in plot.window(xlim, ylim, log = log, ...): "names.argr" is not a
## graphical parameter
## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...):
## "names.argr" is not a graphical parameter
## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): "names.argr" is
## not a graphical parameter

# Perform second-order diffrencing on the time series
diff2.marina <- ts_marina %>%
  diff() %>%
  diff()
plot(diff2.marina, main = "Time series plot of Marina d = 2")

# Compute the ACF of the seond-order diffrenced time series (Non Seasonal)
acf1 <- diff2.marina %>%
  acf(lag.max = 96)

plot(acf1, max = "n", main = "ACF d2")
axis(1,at = 0:96/24, labels = 0:96)

# Extract and plot significant lags from ACF d2 time series (Seasonal)
acf1$lag <- acf1$lag*24
acf1.1 <- as.data.frame(cbind(acf1$acf, acf1$lag))
acf1.2 <- acf0.1[which(acf1.1$V2 %% 24 == 0), ]
barplot(height = acf1.2$V1,
        names.argr = acf1.2$V2,
        ylab = "ACF", xlab = "Lag")
## Warning in plot.window(xlim, ylim, log = log, ...): "names.argr" is not a
## graphical parameter
## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...):
## "names.argr" is not a graphical parameter
## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): "names.argr" is
## not a graphical parameter

# Perform seasonal differencing on the time series
diff24.marina <- ts_marina %>%
  diff(lag = 24)
plot(diff24.marina, main = "Time series plot of marina D = 1", xaxt = "n")
axis(1, at = 0:96, labels = 0:96)

# Compute the ACF of the Seasonality differenced time series
acf2 <- acf(diff24.marina, lag.max = 96, xaxt = "n", main = "ACF d = 1")

acf2$lag <- acf0$lag*24
acf2.1 <- as.data.frame(cbind(acf2$acf, acf2$lag))
acf2.2 <- acf0.1[which(acf2.1$V2 %% 24 == 0), ]
barplot(height = acf2.2$V1,
        names.argr = acf2.2$V2,
        ylab = "ACF", xlab = "Lag")
## Warning in plot.window(xlim, ylim, log = log, ...): "names.argr" is not a
## graphical parameter
## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...):
## "names.argr" is not a graphical parameter
## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): "names.argr" is
## not a graphical parameter

# Perform d2D1 time series
diff24.marina <- diff24.marina %>%
  diff() %>%
  diff()
# Plot the time series display for the d2D1 data
diff24.marina %>%
  ggtsdisplay(lag.max = 100)

3 SARIMA

# Fit SARIMA model (0, 2, 2)(3, 1, 1)[24] to the original time series
model4 <- ts_marina %>%
  Arima(order = c(0, 2, 2),
        seasonal = list(order = c(3, 1, 1), period = 24))
# Summarize SARIMA model 4
model4 %>%
  summary()
## Series: . 
## ARIMA(0,2,2)(3,1,1)[24] 
## 
## Coefficients:
##           ma1     ma2    sar1   sar2     sar3     sma1
##       -1.1325  0.1325  0.4500  0.205  -0.1005  -0.9642
## s.e.   0.0236  0.0236  0.0264  0.024   0.0244   0.0075
## 
## sigma^2 = 38.28:  log likelihood = -7096.06
## AIC=14206.12   AICc=14206.18   BIC=14245.94
## 
## Training set error measures:
##                       ME     RMSE    MAE         MPE     MAPE     MASE
## Training set -0.01831058 6.141925 4.3994 -0.08567746 2.419765 0.519676
##                     ACF1
## Training set -0.01237674
model4 %>%
  AIC()
## [1] 14206.12
checkresiduals(model4)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,2,2)(3,1,1)[24]
## Q* = 1049.2, df = 42, p-value < 2.2e-16
## 
## Model df: 6.   Total lags used: 48

4 Overfitting

# Overfit the mdoel by adding an additional AR/MA parameter
model5 <- ts_marina %>%
  Arima(order = c(0, 2, 3),
        seasonal = list(order = c(3, 1, 1), period = 24))
model5 %>%
  summary()
## Series: . 
## ARIMA(0,2,3)(3,1,1)[24] 
## 
## Coefficients:
##           ma1     ma2      ma3    sar1    sar2     sar3     sma1
##       -1.1386  0.3005  -0.1619  0.3992  0.1908  -0.0764  -0.9653
## s.e.   0.0261  0.0349   0.0212  0.0269  0.0239   0.0240   0.0075
## 
## sigma^2 = 37.32:  log likelihood = -7069.26
## AIC=14154.53   AICc=14154.59   BIC=14200.03
## 
## Training set error measures:
##                        ME     RMSE      MAE         MPE     MAPE      MASE
## Training set -0.006189991 6.063398 4.358216 -0.06132103 2.395641 0.5148112
##                    ACF1
## Training set 0.02062256
checkresiduals(model5)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,2,3)(3,1,1)[24]
## Q* = 881.67, df = 41, p-value < 2.2e-16
## 
## Model df: 7.   Total lags used: 48
model6 <- ts_marina %>%
  Arima(order = c(0, 2, 2),
        seasonal = list(order = c(2, 1, 1), period = 24))
model6 %>%
  summary()
## Series: . 
## ARIMA(0,2,2)(2,1,1)[24] 
## 
## Coefficients:
##           ma1     ma2    sar1    sar2     sma1
##       -1.0994  0.0994  0.4223  0.1703  -0.9705
## s.e.   0.0216  0.0215  0.0255  0.0226   0.0076
## 
## sigma^2 = 38.56:  log likelihood = -7104.46
## AIC=14220.93   AICc=14220.96   BIC=14255.05
## 
## Training set error measures:
##                       ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -0.02302981 6.166003 4.429657 -0.0942282 2.433447 0.5232501
##                      ACF1
## Training set -0.009071086
checkresiduals(model6)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,2,2)(2,1,1)[24]
## Q* = 1193.3, df = 43, p-value < 2.2e-16
## 
## Model df: 5.   Total lags used: 48

5 Peramalan

# Make forecasts using the SARIMA model 4
library(magrittr)
ramalan <- model4 %>%
  forecast(240) # 10 days x 24 hour
# Calculate accuracy of the forecast
accuracy(ramalan, marina_test$y[1:240])
##                       ME      RMSE      MAE         MPE     MAPE      MASE
## Training set -0.01831058  6.141925  4.39940 -0.08567746 2.419765 0.6004995
## Test set     -3.91376521 16.568784 13.23653 -3.42506942 7.947763 1.8067303
##                     ACF1
## Training set -0.01237674
## Test set              NA
# Plot the forecast
plot(ramalan)

ramalan <- model5 %>%
  forecast(240) # 10 days x 24 hour
accuracy(ramalan, marina_test$y[1:240])
##                        ME      RMSE       MAE         MPE     MAPE      MASE
## Training set -0.006189991  6.063398  4.358216 -0.06132103 2.395641 0.5948781
## Test set     -0.970717075 16.591236 13.614974 -1.73664725 7.966468 1.8583863
##                    ACF1
## Training set 0.02062256
## Test set             NA
plot(ramalan)

# Find the maximum value in the original time series
marina_complete$ds %>%
  max() %>%
  format("%y-%m-%d %H:%M:%S")
## [1] "23-06-01 00:00:00"
# Extract the last 92 days of data from marina_complete
marina_model <- marina_complete %>%
  tail(92*24)
marina_model %>%
  summary()
##        ds                            y        
##  Min.   :2023-03-01 01:00:00   Min.   :128.0  
##  1st Qu.:2023-03-24 00:45:00   1st Qu.:175.0  
##  Median :2023-04-16 00:30:00   Median :193.0  
##  Mean   :2023-04-16 00:30:00   Mean   :192.7  
##  3rd Qu.:2023-05-09 00:15:00   3rd Qu.:211.0  
##  Max.   :2023-06-01 00:00:00   Max.   :263.0
# Create a time series object from marina_model
marina_ts <- ts(marina_model$y, frequency = 24)

6 Model Terbaik

# Fit a final ARIMA Model (0,2,2)(3,1,1) to the marina_model time series
final_model <- marina_ts %>%
  Arima(order = c(0,2,2),
        seasonal = list(order = c(3,1,1), period = 24))
# Make forecasts for the next 6 periods using the final model
ramalan_next <- final_model %>%
  forecast(6) # 6 hours
plot(ramalan_next)

# Convert Forecast as dataframe
ramalan_next %>%
  as.data.frame() %>%
  round()
##          Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 93.00000            195   187   202   183   207
## 93.04167            189   179   200   174   205
## 93.08333            186   173   198   167   205
## 93.12500            182   168   196   161   204
## 93.16667            181   165   197   157   205
## 93.20833            183   166   200   157   209
# Fit an ARIMA model to the original time series
model_arima <- Arima(ts_marina, order = c(0, 2, 2), seasonal = list(order = c(3, 1, 1), period = 24))

# Summarize the ARIMA model
summary(model_arima)
## Series: ts_marina 
## ARIMA(0,2,2)(3,1,1)[24] 
## 
## Coefficients:
##           ma1     ma2    sar1   sar2     sar3     sma1
##       -1.1325  0.1325  0.4500  0.205  -0.1005  -0.9642
## s.e.   0.0236  0.0236  0.0264  0.024   0.0244   0.0075
## 
## sigma^2 = 38.28:  log likelihood = -7096.06
## AIC=14206.12   AICc=14206.18   BIC=14245.94
## 
## Training set error measures:
##                       ME     RMSE    MAE         MPE     MAPE     MASE
## Training set -0.01831058 6.141925 4.3994 -0.08567746 2.419765 0.519676
##                     ACF1
## Training set -0.01237674
# Calculate the AIC for the ARIMA model
AIC(model_arima)
## [1] 14206.12
# Make forecasts using the ARIMA model
forecast_result <- forecast(model_arima)

# View the forecasted values
print(forecast_result)
##          Point Forecast     Lo 80    Hi 80     Lo 95    Hi 95
## 93.00000       154.9952 147.06269 162.9277 142.86346 167.1270
## 93.04167       160.5077 150.00191 171.0136 144.44047 176.5750
## 93.08333       169.8297 157.26385 182.3955 150.61190 189.0475
## 93.12500       172.0644 157.72881 186.4000 150.14000 193.9888
## 93.16667       175.7212 159.80899 191.6335 151.38556 200.0569
## 93.20833       186.7803 169.43184 204.1288 160.24810 213.3126
## 93.25000       187.9304 169.25360 206.6072 159.36671 216.4940
## 93.29167       201.6827 181.76404 221.6014 171.21972 232.1458
## 93.33333       203.4464 182.35682 224.5359 171.19270 235.7000
## 93.37500       212.4564 190.25586 234.6569 178.50362 246.4091
## 93.41667       210.0402 186.77995 233.3004 174.46673 245.6136
## 93.45833       210.3247 186.04933 234.6001 173.19871 247.4507
## 93.50000       199.7770 174.52563 225.0284 161.15836 238.3956
## 93.54167       195.0894 168.89682 221.2819 155.03132 235.1474
## 93.58333       188.2988 161.19625 215.4014 146.84902 229.7486
## 93.62500       185.7636 157.77915 213.7480 142.96509 228.5621
## 93.66667       179.4875 150.64678 208.3283 135.37940 223.5957
## 93.70833       171.7436 142.06981 201.4173 126.36148 217.1256
## 93.75000       166.5824 136.09715 197.0677 119.95920 213.2057
## 93.79167       167.0720 135.79490 198.3491 119.23780 214.9062
## 93.83333       161.1600 129.10937 193.2106 112.14280 210.1771
## 93.87500       156.8265 124.01936 189.6336 106.65232 207.0006
## 93.91667       157.3333 123.78548 190.8810 106.02637 208.6401
## 93.95833       154.6741 120.40052 188.9477 102.25717 207.0911
## 94.00000       155.1701 119.21210 191.1281 100.17707 210.1632
## 94.04167       160.2352 122.81034 197.6601 102.99879 217.4717
## 94.08333       169.5632 130.72447 208.4019 110.16449 228.9619
## 94.12500       172.1240 131.91896 212.3291 110.63567 233.6124
## 94.16667       177.6883 136.15958 219.2169 114.17563 241.2009
## 94.20833       189.3892 146.57575 232.2026 123.91168 254.8667
## 94.25000       192.8165 148.75370 236.8793 125.42826 260.2048
## 94.29167       205.4112 160.13147 250.6909 136.16185 274.6605
## 94.33333       208.2776 161.81100 254.7443 137.21304 279.3422
## 94.37500       213.9143 166.28839 261.5402 141.07676 286.7518
## 94.41667       214.0493 165.28990 262.8088 139.47821 288.6205
## 94.45833       213.5054 163.63642 263.3745 137.23736 289.7735
## 94.50000       204.5923 153.63607 255.5485 126.66149 282.5230
## 94.54167       199.8054 147.78300 251.8278 120.24402 279.3667
## 94.58333       192.5070 139.43815 245.5758 111.34521 273.6688
## 94.62500       188.0795 133.98280 242.1762 105.34573 270.8133
## 94.66667       180.2200 125.11298 235.3270  95.94108 264.4989
## 94.70833       173.3844 117.28363 229.4852  87.58568 259.1831
## 94.75000       167.2421 110.16333 224.3209  79.94766 254.5365
## 94.79167       166.0713 108.02944 224.1131  77.30395 254.8386
## 94.83333       160.7613 101.77062 219.7520  70.54283 250.9798
## 94.87500       156.3578  96.43174 216.2838  64.70881 248.0067
## 94.91667       156.7073  95.85886 217.5558  63.64763 249.7670
## 94.95833       152.2535  90.49493 214.0121  57.80192 246.7051