SARIMA Model

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

##################################################################################################
## Nama: JOICE OCRISA 
## NRP: 5003221066 
## Introduction to Time Series Analysis C
##################################################################################################

# Library -----------------------------------------------------------------------------------------
library(seastests)

## Warning: package 'seastests' was built under R version 4.3.3

library(forecast)

## Warning: package 'forecast' was built under R version 4.3.3

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.3.3

library(tseries)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(lubridate)

## Warning: package 'lubridate' was built under R version 4.3.3

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

# Data --------------------------------------------------------------------------------------------
data <- read.csv("C:\\Users\\User\\Downloads\\data.csv")
head(data)

##                     ts rate
## 1 2020-02-01T00:00:00Z  100
## 2 2020-02-01T00:10:00Z   97
## 3 2020-02-01T00:20:00Z   96
## 4 2020-02-01T00:30:00Z  100
## 5 2020-02-01T00:40:00Z   89
## 6 2020-02-01T00:50:00Z   91

# Formatting Timestamp ----------------------------------------------------------------------------
data$ts <- as.POSIXct(data$ts, format="%Y-%m-%dT%H:%M:%OSZ", tz="UTC")
head(data)  # Memeriksa apakah konversi waktu berhasil

##                    ts rate
## 1 2020-02-01 00:00:00  100
## 2 2020-02-01 00:10:00   97
## 3 2020-02-01 00:20:00   96
## 4 2020-02-01 00:30:00  100
## 5 2020-02-01 00:40:00   89
## 6 2020-02-01 00:50:00   91

# Convert data into time series objects -----------------------------------------------------------
time_series <- ts(data$rate, frequency = 144) 

# Seasonality Check ------------------------------------------------------------------------------
isSeasonal(time_series)

## [1] TRUE

# Split the Data-----------------------------------------------------------------------------------
day26 <- ymd_hms("2020-02-26 00:00:00", tz = "UTC")
train_data <- data %>% filter(ts < day26)
toforc_data <- data %>% filter(ts >= day26)

# Plot of The Data --------------------------------------------------------------------------------
plot(train_data, type = 'l')

plot(toforc_data, type = 'l')

train_data$group <- "Train"
toforc_data$group <- "To Forecast"
all_data <- rbind(train_data, toforc_data)

ggplot(all_data, aes(x = ts, y = rate, color = group)) +
  geom_line() +
  labs(x = "TimeStamp", y = "Rate", title = "Combined Plot: Train and To Forecast Data") +
  theme_minimal() +
  scale_x_datetime(labels = scales::date_format("%Y-%m-%d %H:%M")) +
  scale_color_manual(values = c("Train" = "blue", "To Forecast" = "black")) +
  theme(legend.title = element_blank())

############ Stationary Data Test ##################################################################

# Box-Cox Transformation --------------------------------------------------------------------------
# for check if the data stationary in Varians
trans <- BoxCox.lambda(train_data$rate)
data_transformed1 <- BoxCox(train_data$rate, trans) # Do the transformation

trans2 <- BoxCox.lambda(data_transformed1)
data_transformed2 <- BoxCox(data_transformed1, trans2) # Do the transformation

BoxCox.lambda(data_transformed2)

## [1] 1.000513

# ACF and PACF Plot -------------------------------------------------------------------------------
# for check if the data stationary in Mean
par(mfrow = c(1, 2))
acf(data_transformed2, lag.max = 144, main = "ACF")
pacf(data_transformed2, lag.max = 144, main = "PACF")

# Seasonal Differencing Order 1 -------------------------------------------------------------------
seas_diff1 <- diff(data_transformed2, differences = 1, lag= 144)
adf.test(seas_diff1)

## Warning in adf.test(seas_diff1): p-value smaller than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  seas_diff1
## Dickey-Fuller = -7.125, Lag order = 15, p-value = 0.01
## alternative hypothesis: stationary

# Non-Seasonal Differencing Order 1 ---------------------------------------------------------------
nonseas_diff1 <- diff(data_transformed2, differences = 1)
adf.test(nonseas_diff1)

## Warning in adf.test(nonseas_diff1): p-value smaller than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  nonseas_diff1
## Dickey-Fuller = -9.321, Lag order = 15, p-value = 0.01
## alternative hypothesis: stationary

# ACF and PACF Plot of Non Seasonal Data ----------------------------------------------------------
par(mfrow = c(1, 2))
acf(nonseas_diff1, lag.max = 144, main = "ACF After Differencing")
pacf(nonseas_diff1, lag.max = 144, main = "PACF After Differencing")

# Find (p, d, q) (P, D, Q) ------------------------------------------------------------------------ 
# PACF    p(AR) = 3     P(SAR) = 1
# Diff    d     = 1     D      = 1
# ACF     q(MA) = 2     Q(SMA) = 1

############ Model Estimations ####################################################################

# Generate multiple models with specified orders --------------------------------------------------
models <- list(
  model1 = Arima(train_data$rate, order = c(3, 1, 2), seasonal = list(order = c(1, 1, 1)), include.drift = FALSE),
  model2 = Arima(train_data$rate, order = c(3, 0, 2), seasonal = list(order = c(0, 1, 1)), include.drift = FALSE),
  model3 = Arima(train_data$rate, order = c(4, 1, 2), seasonal = list(order = c(1, 1, 2)), include.drift = FALSE),
  model4 = Arima(train_data$rate, order = c(3, 1, 3), seasonal = list(order = c(0, 1, 1)), include.drift = FALSE),
  model5 = Arima(train_data$rate, order = c(3, 0, 3), seasonal = list(order = c(1, 0, 1)), include.drift = FALSE),
  model6 = Arima(train_data$rate, order = c(2, 1, 2), seasonal = list(order = c(2, 1, 1)), include.drift = FALSE),
  model7 = Arima(train_data$rate, order = c(3, 0, 3), seasonal = list(order = c(0, 1, 1)), include.drift = FALSE),
  model8 = Arima(train_data$rate, order = c(3, 1, 4), seasonal = list(order = c(2, 1, 1)), include.drift = FALSE),
  model9 = Arima(train_data$rate, order = c(5, 0, 3), seasonal = list(order = c(1, 1, 1)), include.drift = FALSE),
  model10 = Arima(train_data$rate, order = c(4, 1, 4), seasonal = list(order = c(2, 1, 2)), include.drift = FALSE)
)
# Evaluate models  ----------------------------------------------------------------------------------
model_comparison <- data.frame(
  Model = names(models),
  AIC = sapply(models, AIC)
)

# Print Model Comparison ----------------------------------------------------------------------------
print(model_comparison)

##           Model      AIC
## model1   model1 26427.33
## model2   model2 26348.09
## model3   model3 26431.44
## model4   model4 26427.26
## model5   model5 26211.69
## model6   model6 26427.57
## model7   model7 26349.75
## model8   model8 26433.20
## model9   model9 26362.45
## model10 model10 26433.07

# Select the Best Model -----------------------------------------------------------------------------
best_model_name <- model_comparison$Model[which.min(model_comparison$AIC)]
best_model <- models[[best_model_name]]

cat("Best Model:", best_model_name, "\n")

## Best Model: model5

############ Forecasting ############################################################################
# Forecast Using the Best Model ---------------------------------------------------------------------
forecast_sarima <- forecast(best_model, h = 144)

# Forecast Plot -------------------------------------------------------------------------------------
df <- data.frame(
  ts = seq(from = max(train_data$ts), by = "10 min", length.out = 144), 
  rate = forecast_sarima$mean,
  group = "Forecast"
)

train_data$group <- "Train"
toforc_data$group <- "To Forecast"
forecast_plot <- bind_rows(train_data, toforc_data, df)

ggplot(forecast_plot, aes(x = ts, y = rate, color = group)) +
  geom_line() +
  labs(x = "Date", y = "Rate", title = "Time Series Plot of Rate with Forecast SARIMA") +
  theme_minimal() +
  scale_x_datetime(labels = scales::date_format("%Y-%m-%d %H:%M")) +
  scale_color_manual(values = c("Train" = "blue", "To Forecast" = "black", "Forecast" = "red"))

# Ljung-Box Test for residuals-- ------------------------------------------------------------------
# White Noise Check
Box.test(residuals(forecast_sarima), lag = 144, type = "Ljung-Box")

## 
##  Box-Ljung test
## 
## data:  residuals(forecast_sarima)
## X-squared = 272.26, df = 144, p-value = 5.938e-10

# Shapiro-Wilk Test for residual normality --------------------------------------------------------
shapiro.test(residuals(forecast_sarima))

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(forecast_sarima)
## W = 0.98468, p-value < 2.2e-16

SARIMA Model

Joice Ocrisa

2024-11-16

R Markdown

Including Plots