This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
##################################################################################################
## Nama: JOICE OCRISA
## NRP: 5003221066
## Introduction to Time Series Analysis C
##################################################################################################
# Library -----------------------------------------------------------------------------------------
library(seastests)
## Warning: package 'seastests' was built under R version 4.3.3
library(forecast)
## Warning: package 'forecast' was built under R version 4.3.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(tseries)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.3.3
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
# Data --------------------------------------------------------------------------------------------
data <- read.csv("C:\\Users\\User\\Downloads\\data.csv")
head(data)
## ts rate
## 1 2020-02-01T00:00:00Z 100
## 2 2020-02-01T00:10:00Z 97
## 3 2020-02-01T00:20:00Z 96
## 4 2020-02-01T00:30:00Z 100
## 5 2020-02-01T00:40:00Z 89
## 6 2020-02-01T00:50:00Z 91
# Formatting Timestamp ----------------------------------------------------------------------------
data$ts <- as.POSIXct(data$ts, format="%Y-%m-%dT%H:%M:%OSZ", tz="UTC")
head(data) # Memeriksa apakah konversi waktu berhasil
## ts rate
## 1 2020-02-01 00:00:00 100
## 2 2020-02-01 00:10:00 97
## 3 2020-02-01 00:20:00 96
## 4 2020-02-01 00:30:00 100
## 5 2020-02-01 00:40:00 89
## 6 2020-02-01 00:50:00 91
# Convert data into time series objects -----------------------------------------------------------
time_series <- ts(data$rate, frequency = 144)
# Seasonality Check ------------------------------------------------------------------------------
isSeasonal(time_series)
## [1] TRUE
# Split the Data-----------------------------------------------------------------------------------
day26 <- ymd_hms("2020-02-26 00:00:00", tz = "UTC")
train_data <- data %>% filter(ts < day26)
toforc_data <- data %>% filter(ts >= day26)
# Plot of The Data --------------------------------------------------------------------------------
plot(train_data, type = 'l')
plot(toforc_data, type = 'l')
train_data$group <- "Train"
toforc_data$group <- "To Forecast"
all_data <- rbind(train_data, toforc_data)
ggplot(all_data, aes(x = ts, y = rate, color = group)) +
geom_line() +
labs(x = "TimeStamp", y = "Rate", title = "Combined Plot: Train and To Forecast Data") +
theme_minimal() +
scale_x_datetime(labels = scales::date_format("%Y-%m-%d %H:%M")) +
scale_color_manual(values = c("Train" = "blue", "To Forecast" = "black")) +
theme(legend.title = element_blank())
############ Stationary Data Test ##################################################################
# Box-Cox Transformation --------------------------------------------------------------------------
# for check if the data stationary in Varians
trans <- BoxCox.lambda(train_data$rate)
data_transformed1 <- BoxCox(train_data$rate, trans) # Do the transformation
trans2 <- BoxCox.lambda(data_transformed1)
data_transformed2 <- BoxCox(data_transformed1, trans2) # Do the transformation
BoxCox.lambda(data_transformed2)
## [1] 1.000513
# ACF and PACF Plot -------------------------------------------------------------------------------
# for check if the data stationary in Mean
par(mfrow = c(1, 2))
acf(data_transformed2, lag.max = 144, main = "ACF")
pacf(data_transformed2, lag.max = 144, main = "PACF")
# Seasonal Differencing Order 1 -------------------------------------------------------------------
seas_diff1 <- diff(data_transformed2, differences = 1, lag= 144)
adf.test(seas_diff1)
## Warning in adf.test(seas_diff1): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: seas_diff1
## Dickey-Fuller = -7.125, Lag order = 15, p-value = 0.01
## alternative hypothesis: stationary
# Non-Seasonal Differencing Order 1 ---------------------------------------------------------------
nonseas_diff1 <- diff(data_transformed2, differences = 1)
adf.test(nonseas_diff1)
## Warning in adf.test(nonseas_diff1): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: nonseas_diff1
## Dickey-Fuller = -9.321, Lag order = 15, p-value = 0.01
## alternative hypothesis: stationary
# ACF and PACF Plot of Non Seasonal Data ----------------------------------------------------------
par(mfrow = c(1, 2))
acf(nonseas_diff1, lag.max = 144, main = "ACF After Differencing")
pacf(nonseas_diff1, lag.max = 144, main = "PACF After Differencing")
# Find (p, d, q) (P, D, Q) ------------------------------------------------------------------------
# PACF p(AR) = 3 P(SAR) = 1
# Diff d = 1 D = 1
# ACF q(MA) = 2 Q(SMA) = 1
############ Model Estimations ####################################################################
# Generate multiple models with specified orders --------------------------------------------------
models <- list(
model1 = Arima(train_data$rate, order = c(3, 1, 2), seasonal = list(order = c(1, 1, 1)), include.drift = FALSE),
model2 = Arima(train_data$rate, order = c(3, 0, 2), seasonal = list(order = c(0, 1, 1)), include.drift = FALSE),
model3 = Arima(train_data$rate, order = c(4, 1, 2), seasonal = list(order = c(1, 1, 2)), include.drift = FALSE),
model4 = Arima(train_data$rate, order = c(3, 1, 3), seasonal = list(order = c(0, 1, 1)), include.drift = FALSE),
model5 = Arima(train_data$rate, order = c(3, 0, 3), seasonal = list(order = c(1, 0, 1)), include.drift = FALSE),
model6 = Arima(train_data$rate, order = c(2, 1, 2), seasonal = list(order = c(2, 1, 1)), include.drift = FALSE),
model7 = Arima(train_data$rate, order = c(3, 0, 3), seasonal = list(order = c(0, 1, 1)), include.drift = FALSE),
model8 = Arima(train_data$rate, order = c(3, 1, 4), seasonal = list(order = c(2, 1, 1)), include.drift = FALSE),
model9 = Arima(train_data$rate, order = c(5, 0, 3), seasonal = list(order = c(1, 1, 1)), include.drift = FALSE),
model10 = Arima(train_data$rate, order = c(4, 1, 4), seasonal = list(order = c(2, 1, 2)), include.drift = FALSE)
)
# Evaluate models ----------------------------------------------------------------------------------
model_comparison <- data.frame(
Model = names(models),
AIC = sapply(models, AIC)
)
# Print Model Comparison ----------------------------------------------------------------------------
print(model_comparison)
## Model AIC
## model1 model1 26427.33
## model2 model2 26348.09
## model3 model3 26431.44
## model4 model4 26427.26
## model5 model5 26211.69
## model6 model6 26427.57
## model7 model7 26349.75
## model8 model8 26433.20
## model9 model9 26362.45
## model10 model10 26433.07
# Select the Best Model -----------------------------------------------------------------------------
best_model_name <- model_comparison$Model[which.min(model_comparison$AIC)]
best_model <- models[[best_model_name]]
cat("Best Model:", best_model_name, "\n")
## Best Model: model5
############ Forecasting ############################################################################
# Forecast Using the Best Model ---------------------------------------------------------------------
forecast_sarima <- forecast(best_model, h = 144)
# Forecast Plot -------------------------------------------------------------------------------------
df <- data.frame(
ts = seq(from = max(train_data$ts), by = "10 min", length.out = 144),
rate = forecast_sarima$mean,
group = "Forecast"
)
train_data$group <- "Train"
toforc_data$group <- "To Forecast"
forecast_plot <- bind_rows(train_data, toforc_data, df)
ggplot(forecast_plot, aes(x = ts, y = rate, color = group)) +
geom_line() +
labs(x = "Date", y = "Rate", title = "Time Series Plot of Rate with Forecast SARIMA") +
theme_minimal() +
scale_x_datetime(labels = scales::date_format("%Y-%m-%d %H:%M")) +
scale_color_manual(values = c("Train" = "blue", "To Forecast" = "black", "Forecast" = "red"))
# Ljung-Box Test for residuals-- ------------------------------------------------------------------
# White Noise Check
Box.test(residuals(forecast_sarima), lag = 144, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: residuals(forecast_sarima)
## X-squared = 272.26, df = 144, p-value = 5.938e-10
# Shapiro-Wilk Test for residual normality --------------------------------------------------------
shapiro.test(residuals(forecast_sarima))
##
## Shapiro-Wilk normality test
##
## data: residuals(forecast_sarima)
## W = 0.98468, p-value < 2.2e-16
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.