Data yang digunakan adalah data tinggi muka air (TMA) Laut Marina Ancol per jam dalam periode 1 Januari 2021 00:00 WIB - 1 Juni 2023 00:00 WIB.
Akan dilakukan prediksi tinggi muka air (TMA) Laut Marina Ancol dalam 6 jam ke depan dari data terakhir. Yaitu Prediksi Laut TMA Marina Ancol 1 Juni 2023 01:00 - 1 Juni 2023 06:00 WIB.
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(forecast)
library(stats)
library(TSA)
## Registered S3 methods overwritten by 'TSA':
## method from
## fitted.Arima forecast
## plot.Arima forecast
##
## Attaching package: 'TSA'
## The following object is masked from 'package:readr':
##
## spec
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
# set theme for ggplot2 plots
theme_set(theme_bw())
# Read the CSV file "marina_complete.csv" into a data frame
marina_complete <- read.csv("D:/Kuliah/Mat/TSA Kominfo/Praktikum/5. marina_complete.csv")
head(marina_complete)
## ds y
## 1 2021-01-01T00:00:00Z 170
## 2 2021-01-01T01:00:00Z 161
## 3 2021-01-01T02:00:00Z 172
## 4 2021-01-01T03:00:00Z 177
## 5 2021-01-01T04:00:00Z 181
## 6 2021-01-01T05:00:00Z 188
marina_complete$ds <- ymd_hms(marina_complete$ds)
# Filter the data for training set(October to December 2022)
marina_train <- marina_complete %>%
filter(year(ds) == 2022 & month(ds) %in% 10:12)
head(marina_train)
## ds y
## 1 2022-10-01 00:00:00 167
## 2 2022-10-01 01:00:00 168
## 3 2022-10-01 02:00:00 168
## 4 2022-10-01 03:00:00 171
## 5 2022-10-01 04:00:00 178
## 6 2022-10-01 05:00:00 181
# Filter the data for test set (January 2023, first 10 days)
marina_test <- marina_complete %>%
filter(year(ds) == 2023 & month(ds) == 1 & day(ds) %in% 1:10)
head(marina_test)
## ds y
## 1 2023-01-01 00:00:00 159
## 2 2023-01-01 01:00:00 163
## 3 2023-01-01 02:00:00 176
## 4 2023-01-01 03:00:00 187
## 5 2023-01-01 04:00:00 194
## 6 2023-01-01 05:00:00 199
marina_train %>%
#filter(between(x = ds,
# left = ymd_hm("2022-10-01 00:00"),
# right = ymd_hm("2022-10-15 00:00"))) %>%
ggplot(aes(x = ds, y = y)) +
geom_line() +
geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
marina_train %>%
filter(between(x = ds,
left = ymd_hm("2022-10-01 00:00"),
right = ymd_hm("2022-10-15 00:00"))) %>%
ggplot(aes(x = ds, y = y)) +
geom_line() +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
marina_train %>%
filter(between(x = ds,
left = ymd_hm("2022-10-01 00:00"),
right = ymd_hm("2022-10-01 23:00"))) %>%
ggplot(aes(x = ds, y = y)) +
geom_line() +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
# Createa time series object with a frequency of 24
ts_marina <- ts(data = marina_train$y, frequency = 24)
# decompose the time series into its components(trend, seasonal, residual)
datacomponents = decompose(ts_marina, type = "additive")
plot(datacomponents)
# Create various plots to analyze the time series
plot.ts(ts_marina)
seasonplot(x = ts_marina,
s = 24,
col = rainbow(18))
monthplot(ts_marina)
boxplot(ts_marina ~ cycle(ts_marina))
# Compute the autocorrelation function (ACF) of the time series (Non-Season)
acf0 <- ts_marina %>%
acf(lag.max = 96)
plot(acf0, main = "ACF", xaxt = "n")
axis(1, at = 0:96/24, labels = 0:96)
# Extract and Plot significant lags from ACF (Seasonal)
acf0$lag <- acf0$lag*24
acf0.1 <- as.data.frame(cbind(acf0$acf, acf0$lag))
acf0.2 <- acf0.1[which(acf0.1$V2 %% 24 == 0), ]
barplot(height = acf0.2$V1,
names.argr = acf0.2$V2,
ylab = "ACF", xlab = "Lag")
## Warning in plot.window(xlim, ylim, log = log, ...): "names.argr" is not a
## graphical parameter
## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...):
## "names.argr" is not a graphical parameter
## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): "names.argr" is
## not a graphical parameter
# Perform second-order diffrencing on the time series
diff2.marina <- ts_marina %>%
diff() %>%
diff()
plot(diff2.marina, main = "Time series plot of Marina d = 2")
# Compute the ACF of the seond-order diffrenced time series (Non Seasonal)
acf1 <- diff2.marina %>%
acf(lag.max = 96)
plot(acf1, max = "n", main = "ACF d2")
axis(1,at = 0:96/24, labels = 0:96)
# Extract and plot significant lags from ACF d2 time series (Seasonal)
acf1$lag <- acf1$lag*24
acf1.1 <- as.data.frame(cbind(acf1$acf, acf1$lag))
acf1.2 <- acf0.1[which(acf1.1$V2 %% 24 == 0), ]
barplot(height = acf1.2$V1,
names.argr = acf1.2$V2,
ylab = "ACF", xlab = "Lag")
## Warning in plot.window(xlim, ylim, log = log, ...): "names.argr" is not a
## graphical parameter
## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...):
## "names.argr" is not a graphical parameter
## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): "names.argr" is
## not a graphical parameter
# Perform seasonal differencing on the time series
diff24.marina <- ts_marina %>%
diff(lag = 24)
plot(diff24.marina, main = "Time series plot of marina D = 1", xaxt = "n")
axis(1, at = 0:96, labels = 0:96)
# Compute the ACF of the Seasonality differenced time series
acf2 <- acf(diff24.marina, lag.max = 96, xaxt = "n", main = "ACF d = 1")
acf2$lag <- acf0$lag*24
acf2.1 <- as.data.frame(cbind(acf2$acf, acf2$lag))
acf2.2 <- acf0.1[which(acf2.1$V2 %% 24 == 0), ]
barplot(height = acf2.2$V1,
names.argr = acf2.2$V2,
ylab = "ACF", xlab = "Lag")
## Warning in plot.window(xlim, ylim, log = log, ...): "names.argr" is not a
## graphical parameter
## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...):
## "names.argr" is not a graphical parameter
## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): "names.argr" is
## not a graphical parameter
# Perform d2D1 time series
diff24.marina <- diff24.marina %>%
diff() %>%
diff()
# Plot the time series display for the d2D1 data
diff24.marina %>%
ggtsdisplay(lag.max = 100)
# Fit SARIMA model (0, 2, 2)(3, 1, 1)[24] to the original time series
model4 <- ts_marina %>%
Arima(order = c(0, 2, 2),
seasonal = list(order = c(3, 1, 1), period = 24))
# Summarize SARIMA model 4
model4 %>%
summary()
## Series: .
## ARIMA(0,2,2)(3,1,1)[24]
##
## Coefficients:
## ma1 ma2 sar1 sar2 sar3 sma1
## -1.1325 0.1325 0.4500 0.205 -0.1005 -0.9642
## s.e. 0.0236 0.0236 0.0264 0.024 0.0244 0.0075
##
## sigma^2 = 38.28: log likelihood = -7096.06
## AIC=14206.12 AICc=14206.18 BIC=14245.94
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.01831058 6.141925 4.3994 -0.08567746 2.419765 0.519676
## ACF1
## Training set -0.01237674
model4 %>%
AIC()
## [1] 14206.12
checkresiduals(model4)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,2,2)(3,1,1)[24]
## Q* = 1049.2, df = 42, p-value < 2.2e-16
##
## Model df: 6. Total lags used: 48
# Overfit the mdoel by adding an additional AR/MA parameter
model5 <- ts_marina %>%
Arima(order = c(0, 2, 3),
seasonal = list(order = c(3, 1, 1), period = 24))
model5 %>%
summary()
## Series: .
## ARIMA(0,2,3)(3,1,1)[24]
##
## Coefficients:
## ma1 ma2 ma3 sar1 sar2 sar3 sma1
## -1.1386 0.3005 -0.1619 0.3992 0.1908 -0.0764 -0.9653
## s.e. 0.0261 0.0349 0.0212 0.0269 0.0239 0.0240 0.0075
##
## sigma^2 = 37.32: log likelihood = -7069.26
## AIC=14154.53 AICc=14154.59 BIC=14200.03
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.006189991 6.063398 4.358216 -0.06132103 2.395641 0.5148112
## ACF1
## Training set 0.02062256
checkresiduals(model5)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,2,3)(3,1,1)[24]
## Q* = 881.67, df = 41, p-value < 2.2e-16
##
## Model df: 7. Total lags used: 48
model6 <- ts_marina %>%
Arima(order = c(0, 2, 2),
seasonal = list(order = c(2, 1, 1), period = 24))
model6 %>%
summary()
## Series: .
## ARIMA(0,2,2)(2,1,1)[24]
##
## Coefficients:
## ma1 ma2 sar1 sar2 sma1
## -1.0994 0.0994 0.4223 0.1703 -0.9705
## s.e. 0.0216 0.0215 0.0255 0.0226 0.0076
##
## sigma^2 = 38.56: log likelihood = -7104.46
## AIC=14220.93 AICc=14220.96 BIC=14255.05
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.02302981 6.166003 4.429657 -0.0942282 2.433447 0.5232501
## ACF1
## Training set -0.009071086
checkresiduals(model6)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,2,2)(2,1,1)[24]
## Q* = 1193.3, df = 43, p-value < 2.2e-16
##
## Model df: 5. Total lags used: 48
# Make forecasts using the SARIMA model 4
library(magrittr)
ramalan <- model4 %>%
forecast(240) # 10 days x 24 hour
# Calculate accuracy of the forecast
accuracy(ramalan, marina_test$y[1:240])
## ME RMSE MAE MPE MAPE MASE
## Training set -0.01831058 6.141925 4.39940 -0.08567746 2.419765 0.6004995
## Test set -3.91376521 16.568784 13.23653 -3.42506942 7.947763 1.8067303
## ACF1
## Training set -0.01237674
## Test set NA
# Plot the forecast
plot(ramalan)
ramalan <- model5 %>%
forecast(240) # 10 days x 24 hour
accuracy(ramalan, marina_test$y[1:240])
## ME RMSE MAE MPE MAPE MASE
## Training set -0.006189991 6.063398 4.358216 -0.06132103 2.395641 0.5948781
## Test set -0.970717075 16.591236 13.614974 -1.73664725 7.966468 1.8583863
## ACF1
## Training set 0.02062256
## Test set NA
plot(ramalan)
# Find the maximum value in the original time series
marina_complete$ds %>%
max() %>%
format("%y-%m-%d %H:%M:%S")
## [1] "23-06-01 00:00:00"
# Extract the last 92 days of data from marina_complete
marina_model <- marina_complete %>%
tail(92*24)
marina_model %>%
summary()
## ds y
## Min. :2023-03-01 01:00:00 Min. :128.0
## 1st Qu.:2023-03-24 00:45:00 1st Qu.:175.0
## Median :2023-04-16 00:30:00 Median :193.0
## Mean :2023-04-16 00:30:00 Mean :192.7
## 3rd Qu.:2023-05-09 00:15:00 3rd Qu.:211.0
## Max. :2023-06-01 00:00:00 Max. :263.0
# Create a time series object from marina_model
marina_ts <- ts(marina_model$y, frequency = 24)
# Fit a final ARIMA Model (0,2,2)(3,1,1) to the marina_model time series
final_model <- marina_ts %>%
Arima(order = c(0,2,2),
seasonal = list(order = c(3,1,1), period = 24))
# Make forecasts for the next 6 periods using the final model
ramalan_next <- final_model %>%
forecast(6) # 6 hours
plot(ramalan_next)
# Convert Forecast as dataframe
ramalan_next %>%
as.data.frame() %>%
round()
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 93.00000 195 187 202 183 207
## 93.04167 189 179 200 174 205
## 93.08333 186 173 198 167 205
## 93.12500 182 168 196 161 204
## 93.16667 181 165 197 157 205
## 93.20833 183 166 200 157 209
# Fit an ARIMA model to the original time series
model_arima <- Arima(ts_marina, order = c(0, 2, 2), seasonal = list(order = c(3, 1, 1), period = 24))
# Summarize the ARIMA model
summary(model_arima)
## Series: ts_marina
## ARIMA(0,2,2)(3,1,1)[24]
##
## Coefficients:
## ma1 ma2 sar1 sar2 sar3 sma1
## -1.1325 0.1325 0.4500 0.205 -0.1005 -0.9642
## s.e. 0.0236 0.0236 0.0264 0.024 0.0244 0.0075
##
## sigma^2 = 38.28: log likelihood = -7096.06
## AIC=14206.12 AICc=14206.18 BIC=14245.94
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.01831058 6.141925 4.3994 -0.08567746 2.419765 0.519676
## ACF1
## Training set -0.01237674
# Calculate the AIC for the ARIMA model
AIC(model_arima)
## [1] 14206.12
# Make forecasts using the ARIMA model
forecast_result <- forecast(model_arima)
# View the forecasted values
print(forecast_result)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 93.00000 154.9952 147.06269 162.9277 142.86346 167.1270
## 93.04167 160.5077 150.00191 171.0136 144.44047 176.5750
## 93.08333 169.8297 157.26385 182.3955 150.61190 189.0475
## 93.12500 172.0644 157.72881 186.4000 150.14000 193.9888
## 93.16667 175.7212 159.80899 191.6335 151.38556 200.0569
## 93.20833 186.7803 169.43184 204.1288 160.24810 213.3126
## 93.25000 187.9304 169.25360 206.6072 159.36671 216.4940
## 93.29167 201.6827 181.76404 221.6014 171.21972 232.1458
## 93.33333 203.4464 182.35682 224.5359 171.19270 235.7000
## 93.37500 212.4564 190.25586 234.6569 178.50362 246.4091
## 93.41667 210.0402 186.77995 233.3004 174.46673 245.6136
## 93.45833 210.3247 186.04933 234.6001 173.19871 247.4507
## 93.50000 199.7770 174.52563 225.0284 161.15836 238.3956
## 93.54167 195.0894 168.89682 221.2819 155.03132 235.1474
## 93.58333 188.2988 161.19625 215.4014 146.84902 229.7486
## 93.62500 185.7636 157.77915 213.7480 142.96509 228.5621
## 93.66667 179.4875 150.64678 208.3283 135.37940 223.5957
## 93.70833 171.7436 142.06981 201.4173 126.36148 217.1256
## 93.75000 166.5824 136.09715 197.0677 119.95920 213.2057
## 93.79167 167.0720 135.79490 198.3491 119.23780 214.9062
## 93.83333 161.1600 129.10937 193.2106 112.14280 210.1771
## 93.87500 156.8265 124.01936 189.6336 106.65232 207.0006
## 93.91667 157.3333 123.78548 190.8810 106.02637 208.6401
## 93.95833 154.6741 120.40052 188.9477 102.25717 207.0911
## 94.00000 155.1701 119.21210 191.1281 100.17707 210.1632
## 94.04167 160.2352 122.81034 197.6601 102.99879 217.4717
## 94.08333 169.5632 130.72447 208.4019 110.16449 228.9619
## 94.12500 172.1240 131.91896 212.3291 110.63567 233.6124
## 94.16667 177.6883 136.15958 219.2169 114.17563 241.2009
## 94.20833 189.3892 146.57575 232.2026 123.91168 254.8667
## 94.25000 192.8165 148.75370 236.8793 125.42826 260.2048
## 94.29167 205.4112 160.13147 250.6909 136.16185 274.6605
## 94.33333 208.2776 161.81100 254.7443 137.21304 279.3422
## 94.37500 213.9143 166.28839 261.5402 141.07676 286.7518
## 94.41667 214.0493 165.28990 262.8088 139.47821 288.6205
## 94.45833 213.5054 163.63642 263.3745 137.23736 289.7735
## 94.50000 204.5923 153.63607 255.5485 126.66149 282.5230
## 94.54167 199.8054 147.78300 251.8278 120.24402 279.3667
## 94.58333 192.5070 139.43815 245.5758 111.34521 273.6688
## 94.62500 188.0795 133.98280 242.1762 105.34573 270.8133
## 94.66667 180.2200 125.11298 235.3270 95.94108 264.4989
## 94.70833 173.3844 117.28363 229.4852 87.58568 259.1831
## 94.75000 167.2421 110.16333 224.3209 79.94766 254.5365
## 94.79167 166.0713 108.02944 224.1131 77.30395 254.8386
## 94.83333 160.7613 101.77062 219.7520 70.54283 250.9798
## 94.87500 156.3578 96.43174 216.2838 64.70881 248.0067
## 94.91667 156.7073 95.85886 217.5558 63.64763 249.7670
## 94.95833 152.2535 90.49493 214.0121 57.80192 246.7051