# Name : Moch. Januar Akbar # NRP : 5003211073 # Class : ADW K
# Import Packages
library(seastests)
library(forecast)
library(ggplot2)
library(lubridate)
library(dplyr)
library(tseries)
## Import Data
data <- read.csv(“C:/Users/HP/Downloads/data (3).csv”)
## Number 1 (Seasonality Check) data\(ts <- as.POSIXct(data\)ts, format = “%Y-%m-%dT%H:%M:%S”, tz = “UTC”) rate_ts <- ts(data$rate, frequency = 144)
seasonalitytest <- isSeasonal(rate_ts) print(seasonalitytest)
## Number 2
# Splitting Data point <- ymd_hms(“2020-02-26 00:00:00”, tz = “UTC”)
start <- as.POSIXct(“2020-01-01 00:00:00”, format = “%Y-%m-%d %H:%M:%S”, tz = “UTC”) end <- as.POSIXct(“2020-12-31 23:59:59”, format = “%Y-%m-%d %H:%M:%S”, tz = “UTC”)
train <- data %>% filter(ts < point) test <- data %>% filter(ts >= point)
print(train) print(test)
## Time Series Plot
train\(set <- "Train" test\)set <- “Test”
plot_data <- bind_rows(train, test)
ggplot(plot_data, aes(x = ts, y = rate, color = set)) + geom_line() + labs(x = “Date”, y = “Rate”, title = “Time Series Plot of Rate”) + theme_minimal() + scale_x_datetime(labels = scales::date_format(“%Y-%m-%d %H:%M”)) + scale_color_manual(values = c(“Train” = “blue”, “Test” = “green”))
## Stationarity Test
# Box Cox Transformation
#1 lambda <- BoxCox.lambda(train\(rate) data_transformed <- BoxCox(train\)rate, lambda) print(lambda) print(data_transformed)
#2 lambda2 <- BoxCox.lambda(data_transformed)
print(lambda2)
# ACF / PACF training #1 par(mfrow = c(1, 2)) ACF1 <- acf(data_transformed, main = “ACF of Transformed Data”, lag.max = 144) PACF1 <- pacf(data_transformed, main = “PACF of Transformed Data”, lag.max = 144)
# Differencing #1 diff1 <- diff(data_transformed, differences = 1) adf_test <- adf.test(diff1) print(adf_test)
#ADF Test adf_test <- adf.test(diff1) adf_test
# ACF and PACF #2 par(mfrow = c(1, 2)) ACF2 <- acf(diff1, main = “ACF of Differenced Data”, lag.max = 144) PACF2 <- pacf(diff1, main = “PACF of Differenced Data”, lag.max = 144)
#Seasonal Differencing seasonal_diff <- diff(diff1, lag = 144) cleaned_data <- na.omit(seasonal_diff)
# ACF and PACF #3 par(mfrow = c(1, 2)) ACF3 <- acf(cleaned_data, main = “ACF of Seasonal Differenced Data”, lag.max = 144) PACF3 <- pacf(cleaned_data, main = “PACF of Seasonal Differenced Data”, lag.max = 144)
#ARIMA #1 best_arima_model <- auto.arima(train$rate)
summary(best_arima_model)
best_arima_model$aic
forecast <- forecast(best_arima_model, h = 150)
dataframe_forecast <- data.frame( ts = seq(from = max(train\(ts), by = "10 min", length.out = 150), rate = forecast\)mean, set = “Forecast” )
train\(set <- "Train" test\)set <- “Test” plot_data <- bind_rows(train, test, dataframe_forecast)
ggplot(plot_data, aes(x = ts, y = rate, color = set)) + geom_line() + labs(x = “Date”, y = “Rate”, title = “Time Series Plot of Rate with Forecast”) + theme_minimal() + scale_x_datetime(labels = scales::date_format(“%Y-%m-%d %H:%M”)) + scale_color_manual(values = c(“Train” = “blue”, “Test” = “green”, “Forecast” = “red”))
## KS testing and normality testing residuals <- residuals(best_arima_model)
ks_test <- ks.test(residuals, “pnorm”, mean(residuals), sd(residuals))
print(ks_test)
ljung_box_test <- Box.test(residuals, lag = 144, type = “Ljung-Box”)
print(ljung_box_test)
#ARIMA
#2
arima_model1 <- arima(train$rate, order = c(7, 1,7 ))
summary(arima_model1)
arima_model1$aic
forecast2 <- forecast(arima_model1, h = 150)
dataframe_forecast2 <- data.frame(
ts = seq(from = max(train$ts), by = "10 min", length.out = 150),
rate = forecast2$mean,
set = "Forecast"
)
train$set <- "Train"
test$set <- "Test"
plot_data <- bind_rows(train, test, dataframe_forecast2)
ggplot(plot_data, aes(x = ts, y = rate, color = set)) +
geom_line() +
labs(x = "Date", y = "Rate", title = "Time Series Plot of Rate with Forecast") +
theme_minimal() +
scale_x_datetime(labels = scales::date_format("%Y-%m-%d %H:%M")) +
scale_color_manual(values = c("Train" = "blue", "Test" = "green", "Forecast" = "red"))
## KS testing and normality testing
residuals2 <- residuals(arima_model1)
ks_test2 <- ks.test(residuals2, "pnorm", mean(residuals2), sd(residuals2))
print(ks_test2)
ljung_box_test2 <- Box.test(residuals2, lag = 144, type = "Ljung-Box")
print(ljung_box_test2)
# SARIMA
sarima <- Arima(train$rate, order=c(7,1,7), seasonal=c(1,1,1))
summary(sarima)
forecas_values <- forecast(sarima, h=150)
###########
dataframe_forecast3 <- data.frame(
ts = seq(from = max(train$ts), by = "10 min", length.out = 150),
rate = forecas_values$mean,
set = "Forecast"
)
train$set <- "Train"
test$set <- "Test"
plot_data1 <- bind_rows(train, test, dataframe_forecast3)
ggplot(plot_data1, aes(x = ts, y = rate, color = set)) +
geom_line() +
labs(x = "Date", y = "Rate", title = "Time Series Plot of Rate with Forecast SARIMA") +
theme_minimal() +
scale_x_datetime(labels = scales::date_format("%Y-%m-%d %H:%M")) +
scale_color_manual(values = c("Train" = "blue", "Test" = "green", "Forecast" = "red"))
accuracy(forecas_values)
residuals_sarima <- residuals(sarima)
ks_test3 <- ks.test(residuals_sarima, "pnorm", mean(residuals_sarima), sd(residuals_sarima))
print(ks_test3)
ljung_box_test3 <- Box.test(residuals_sarima, lag = 144, type = "Ljung-Box")
print(ljung_box_test3)
print(forecas_values)
forecast_table <- data.frame(
ts = seq(from = max(train$ts), by = "10 min", length.out = 150),
set = "Forecast"
)
assign("forecast_table", forecast_table)
print(forecast_table)
## RMSE Tested in Forecasting Data Submission from the previous (0,3) to (0,29)
## I am sorry but i am inadequate and lack of knowledge how to get the best model for ARIMA and SARIMA
## especially if there are lot of p,q and P,Q that can be chosen
## also the researcher is lacking the knowledge how to deal with non normality distibution in the residual and didn't pass the ljung box test
## upload to rpubs
library(knitr)
library(rmarkdown)
render("Moch. Januar Akbar_5003211073_SARIMA Asingment.Rmd", output_format = "html_document")