Intro
knitr::include_graphics("ihsg.jpg")
Rmd kali ini akan memprediksi harga penutupan ihsg menggunakan algoritma time series yaitu dengan model arima dengan menggunakan variable variable tambahan seperti harga emas, minyak dan lain lain
Import Library and Dataset
library(tidyverse)
library(tidyquant)
library(priceR)
library(Quandl)
library(zoo)
library(forecast)
library(lubridate)
library(padr)
library(tseries)
library(randomForest)
gold <- Quandl("LBMA/GOLD", collapse = "daily")
currency <- historical_exchange_rates("USD", to = "IDR", start_date = "2020-01-01", end_date = "2021-02-28")
## For full currency exchange rate API documentation visit:
## https://exchangerate.host/#/#docs
## (this message will only appear once per session)
ihsg <- tq_get("^JKSE")
dowjones <- tq_get("^DJI")
oil <- Quandl("CHRIS/CME_CL1", collapse = "daily")
shanghai <- tq_get("000001.SS")
Data Pre Processing
ihsg <- ihsg %>%
select(c(date, open, close)) %>%
mutate(date = ymd(date)) %>%
filter(date >= "2020-01-01",
date <= "2021-02-26")
currency <- currency %>%
rename(USDIDR = one_USD_equivalent_to_x_IDR)
gold <- gold %>%
select(c(Date, `USD (AM)`)) %>%
rename(Goldprices = `USD (AM)`,
date = Date) %>%
mutate(date = ymd(date)) %>%
filter(date >= "2020-01-01",
date <= "2021-02-28")
dowjones <- dowjones %>%
select(date, adjusted) %>%
rename(dowjones = adjusted) %>%
mutate(date = ymd(date)) %>%
filter(date >= "2020-01-01",
date <= "2021-02-28")
oil <- oil %>%
select(Date, Settle) %>%
rename(date = Date,
OilPrices = Settle) %>%
mutate(date = ymd(date)) %>%
filter(date >= "2020-01-01",
date <= "2021-02-28")
shanghai <- shanghai %>%
select(date, adjusted) %>%
rename(shanghai = adjusted) %>%
mutate(date = ymd(date)) %>%
filter(date >= "2020-01-01",
date <= "2021-02-28")
a <- left_join(ihsg, currency, "date")
b <- left_join(a, gold, "date")
c <- left_join(b, dowjones, "date")
d <- left_join(c, shanghai, "date")
dataset <- left_join(d, oil, "date")
Add variable Inflasi and BI Rate
dataset$Inflasi <- month(dataset$date)
dataset[1:22, ]$Inflasi <- 2.68
dataset[23:42, ]$Inflasi <- 2.98
dataset[43:63, ]$Inflasi <- 2.96
dataset[64:84, ]$Inflasi <- 2.67
dataset[85:100, ]$Inflasi <- 2.19
dataset[101:121, ]$Inflasi <- 1.96
dataset[122:143, ]$Inflasi <- 1.54
dataset[144:161, ]$Inflasi <- 1.32
dataset[162:183, ]$Inflasi <- 1.42
dataset[184:202, ]$Inflasi <- 1.44
dataset[203:223, ]$Inflasi <- 1.59
dataset[224:242, ]$Inflasi <- 1.68
dataset[243:262, ]$Inflasi <- 1.55
dataset[263:281, ]$Inflasi <- 1.38
dataset[1:22, "BiRate"] <- 5
dataset[23:42, "BiRate"] <- 4.75
dataset[43:63, "BiRate"] <- 4.50
dataset[64:84, "BiRate"] <- 4.50
dataset[85:100, "BiRate"] <- 4.50
dataset[101:121, "BiRate"] <- 4.25
dataset[122:143, "BiRate"] <- 4
dataset[144:161, "BiRate"] <- 4
dataset[162:183, "BiRate"] <- 4
dataset[184:202, "BiRate"] <- 4
dataset[203:223, "BiRate"] <- 3.75
dataset[224:242, "BiRate"] <- 3.75
dataset[243:262, "BiRate"] <- 3.75
dataset[263:281, "BiRate"] <- 3.50
Dataset Full
rmarkdown::paged_table(dataset)
Check If There is Variable that didnt influence
model1 <- lm(close~USDIDR + Goldprices + dowjones + OilPrices + shanghai+Inflasi+BiRate, data = dataset)
summary(model1)
##
## Call:
## lm(formula = close ~ USDIDR + Goldprices + dowjones + OilPrices +
## shanghai + Inflasi + BiRate, data = dataset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -375.69 -95.49 9.89 88.33 406.13
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.546e+03 5.385e+02 -4.727 3.86e-06 ***
## USDIDR -3.765e-03 9.948e-03 -0.378 0.705
## Goldprices -8.230e-01 1.711e-01 -4.809 2.67e-06 ***
## dowjones 1.425e-01 7.228e-03 19.717 < 2e-16 ***
## OilPrices 8.453e+00 1.544e+00 5.475 1.09e-07 ***
## shanghai 1.271e+00 1.024e-01 12.408 < 2e-16 ***
## Inflasi 4.906e+02 4.532e+01 10.825 < 2e-16 ***
## BiRate 3.821e+01 5.170e+01 0.739 0.461
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 138.1 on 242 degrees of freedom
## (31 observations deleted due to missingness)
## Multiple R-squared: 0.9501, Adjusted R-squared: 0.9486
## F-statistic: 657.9 on 7 and 242 DF, p-value: < 2.2e-16
Convert to Time Series
ts_dataset <- dataset %>%
select(-date) %>%
na.approx() %>%
msts(seasonal.periods = c(7, 7*4))
ts_dataset [ , "close"] %>%
autoplot()

Cross Validation
test <- tail(ts_dataset, 30)
train <- head(ts_dataset, length(ts_dataset) - 30)
dataset dibagi menjadi 2 kedalam data train dan data test. data train untuk melatih model, dan menggunakan data dari 1 januari 2020 sampai akhr februari dikurangi 30 data terakhir. data test menggunakan data 30 hari terakhir
Modeling
modelarima2 <- stlm(train[ , "close"],
method = "arima",
xreg = train[ , c("open", "USDIDR", "Goldprices", "dowjones", "OilPrices", "BiRate", "Inflasi", "shanghai")])
model menggunakan model arima dengan fungsi stlm dan agar hasilnya lebih kuat, disini menggunakan beberapa variable tambahan
Forecasting and Evaluating
forecastarima2 <- forecast(modelarima2, h = 30, xreg = test[ , c("open", "USDIDR", "Goldprices", "dowjones", "OilPrices", "BiRate", "Inflasi", "shanghai")])
accuracy(forecastarima2$mean, as.numeric(test[, "close"]))
## ME RMSE MAE MPE MAPE
## Test set 15.06163 80.28048 66.05978 0.2383274 1.063973
Plot Model arima
z <- as.data.frame(forecastarima2)
y <- as.data.frame(test)
compare <- cbind(forecast = z$`Point Forecast`, real = y$close)
compare <- as.data.frame(compare)
compare$date <- dataset$date %>%
tail(30)
plot <- compare %>%
ggplot(aes(x = date)) +
geom_line(aes(y = forecast), colour = "red", size = 2) +
geom_line(aes(y = real), colour = "green", size = 2) +
labs(x = "Date",
y = NULL,
title = "Comparison actual vs prediction") +
theme_dark()
plot

Red = Forecast
Green = Aktual
Conclusion
model menggunakan model arima dengan prediktor tambahan menjadi model dengan error paling sedikit, dibanding model tbats dan model autoarima tanpa prediktor tambahan, model arima juga berhasil mengalahkan model dengan random forest dalam hal akurasi, namun tetap model ini masih belum sangat akurat karena nilai mapenya masih diatas satu, dan rmse serta mae nya masih cukup tinggi. mungkin perlu prediktor tambahan agar akurasinya semakin meningkat