library(forecast)
library(TSA)
library(tseries)
library(lubridate)
train <- read.csv("D:/Materi Kuliah/Semester 4/ADW/Tugas/SARIMA Model Forecasting/train.csv", header=TRUE, sep = ";")
train$ts <- ymd_hms(train$ts)
ts.plot(train$y)

# Data train tidak stasioner, maka dilakukan differencing(d=1)
yNS <- diff(train$y,lag=1)
adf.test(yNS)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  yNS
## Dickey-Fuller = -9.4137, Lag order = 15, p-value = 0.01
## alternative hypothesis: stationary
ts.plot(yNS)

par(mfrow=c(1,2))
Acf(yNS,lag.max = 1000)
Pacf(yNS, lag.max = 1000)

# Data train setelah differencing(d=1) tidak stasioner, maka dilakukan differencing(D=1) periode 144 karena diasumsikan pola musiman harian
yS <- diff(train$y,lag=144)
adf.test(yS)
## Warning in adf.test(yS): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  yS
## Dickey-Fuller = -7.1112, Lag order = 15, p-value = 0.01
## alternative hypothesis: stationary
ts.plot(yS)

par(mfrow=c(1,2))
Acf(yS,lag.max = 1000)
Pacf(yS, lag.max = 1000)

# Data train setelah differencing(D=144) tidak stasioner, maka dilakukan differencing(d=1) dan diff(D=1) periode 144
yNSS <- diff(diff(train$y,lag=144))
adf.test(yNSS)
## Warning in adf.test(yNSS): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  yNSS
## Dickey-Fuller = -17.249, Lag order = 15, p-value = 0.01
## alternative hypothesis: stationary
ts.plot(yNSS)

# identifikasi parameter non musiman
par(mfrow=c(1,2))
Acf(yNSS, lag.max=150)
Pacf(yNSS)

# identifikasi parameter musiman
par(mfrow=c(1,2))
Acf(yNSS,lag.max = 1000)
Pacf(yNSS, lag.max = 1000)

# non musiman: ACF cutoff pada lag 2, PACF diesdown -> indikasi MA(2)
# musiman: ACF cutoff pada lag musiman=144, PACF diesdown pada lag kelipatan musiman=144,288,432,..
# model: SARIMA(0,1,2)(0,1,1)144
sarima_model <- Arima(train$y,
                         order=c(0,1,2),
                         seasonal=list(order=c(0,1,1),period=144)
                         )
summary(sarima_model)
## Series: train$y 
## ARIMA(0,1,2)(0,1,1)[144] 
## 
## Coefficients:
##           ma1      ma2     sma1
##       -0.4669  -0.1318  -0.9998
## s.e.   0.0167   0.0168   0.0216
## 
## sigma^2 = 78.76:  log likelihood = -12679.21
## AIC=25366.42   AICc=25366.43   BIC=25391.01
## 
## Training set error measures:
##                       ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -0.05263146 8.690181 6.400593 -0.5212368 5.307085 0.8603127
##                     ACF1
## Training set 0.002252446
sarima_forecast <- forecast(sarima_model, h=83)
autoplot(sarima_forecast)

checkresiduals(sarima_model) 

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,2)(0,1,1)[144]
## Q* = 5.3216, df = 7, p-value = 0.6208
## 
## Model df: 3.   Total lags used: 10
Acf(residuals(sarima_model))  

forecast_df <- data.frame(Forecast = as.numeric(sarima_forecast$mean))
forecast_df
##     Forecast
## 1  141.91417
## 2  138.91257
## 3  134.27258
## 4  135.71256
## 5  135.47256
## 6  133.15255
## 7  132.55256
## 8  127.91257
## 9  125.15258
## 10 123.07260
## 11 123.11259
## 12 122.51260
## 13 124.31261
## 14 121.15260
## 15 119.59261
## 16 116.19263
## 17 116.67264
## 18 112.75264
## 19 111.63266
## 20 111.71268
## 21 109.11268
## 22 108.31269
## 23 111.31270
## 24 106.99272
## 25 105.67274
## 26 103.39275
## 27 104.95276
## 28 102.27278
## 29  94.63278
## 30  87.51280
## 31  85.95284
## 32  91.95282
## 33  88.51285
## 34  88.99287
## 35  89.03289
## 36  85.95290
## 37  88.75293
## 38  88.23294
## 39  87.47298
## 40  87.63297
## 41  83.79299
## 42  88.19299
## 43  90.39303
## 44  89.31305
## 45  93.03307
## 46  95.19307
## 47  99.03309
## 48 109.07312
## 49 112.07314
## 50 114.39317
## 51 113.75320
## 52 121.63322
## 53 126.51326
## 54 126.59328
## 55 131.51328
## 56 134.63331
## 57 135.27334
## 58 139.75337
## 59 150.95338
## 60 153.31342
## 61 160.11343
## 62 166.67346
## 63 168.95349
## 64 170.63354
## 65 172.43356
## 66 172.67360
## 67 176.55363
## 68 180.95367
## 69 182.83370
## 70 185.71373
## 71 186.43377
## 72 187.47380
## 73 188.43384
## 74 191.67389
## 75 193.39392
## 76 195.79396
## 77 196.31400
## 78 195.71405
## 79 196.23409
## 80 198.83413
## 81 198.91418
## 82 200.03423
## 83 200.07428
write.csv(forecast_df, "D:/Materi Kuliah/Semester 4/ADW/Tugas/SARIMA Model Forecasting/Forecast_Aditya075.csv", row.names = FALSE)