library(googlesheets4)
library(tseries)
library(forecast)
library(TTR)
library(TSA)
library(imputeTS)
library(ggplot2)
library(lmtest)
library(ggpubr)
Data yang digunakan adalah data Indeks Harga Saham Gabungan 2 tahun terakhir yang bersumber dari yahoo finance. Data tersebut telah diunduh dan dirangkum pada google sheets ini..
Data yang digunakan hanyalah data Close, didapatkan
dengan syntax berikut:
library(googlesheets4)
gs4_deauth()
daily <- read_sheet("https://docs.google.com/spreadsheets/d/1qLvTOVejNRXFKpu57O_vxJ55cc3qFc_aKDAS_xzU2Go/edit?usp=sharing")
daily <- daily[c("Date","Close")]
daily
## # A tibble: 365 × 2
## Date Close
## <dttm> <dbl>
## 1 2021-12-01 00:00:00 6508.
## 2 2021-12-02 00:00:00 6584.
## 3 2021-12-03 00:00:00 6539.
## 4 2021-12-04 00:00:00 NA
## 5 2021-12-05 00:00:00 NA
## 6 2021-12-06 00:00:00 6547.
## 7 2021-12-07 00:00:00 6603.
## 8 2021-12-08 00:00:00 6604.
## 9 2021-12-09 00:00:00 6644.
## 10 2021-12-10 00:00:00 6653.
## # … with 355 more rows
# Cek apakah ada missing value:
daily[which(is.na(daily$Close)),]
## # A tibble: 119 × 2
## Date Close
## <dttm> <dbl>
## 1 2021-12-04 00:00:00 NA
## 2 2021-12-05 00:00:00 NA
## 3 2021-12-11 00:00:00 NA
## 4 2021-12-12 00:00:00 NA
## 5 2021-12-18 00:00:00 NA
## 6 2021-12-19 00:00:00 NA
## 7 2021-12-25 00:00:00 NA
## 8 2021-12-26 00:00:00 NA
## 9 2021-12-31 00:00:00 NA
## 10 2022-01-01 00:00:00 NA
## # … with 109 more rows
Dari output di atas, dapat terlihat bahwa terdapat missing value pada baris ke-87 atau pada tanggal 2 mei 2022. Hal ini melanggar salah satu syarat data time series, yaitu rentang waktu yang tidak sama. Oleh karena itu, akan dilakukan penanganan missing value.
library(ggplot2)
ggplot_na_distribution(daily$Close)
Pendugaan dilakukan dengan teknik interpolasi “spline”. Nilai dugaan
dari teknik ini didapatkan dengan memperhatikan pola nilai sebelum dan
setelah nilai missing value. Hal ini lebih baik daripada metode
lain yang hanya mengambil nilai rataan 2 nilai sebelum dan setelah, atau
bahkan hanya menduplikasi nilai sebelum/setelahnya. Oleh karena itu,
teknik ini dianggap paling akurat untuk menduga nilai missing
value. Fungsi yang digunakan untuk mengatasi missing value
adalah fungsi na_interpolation yang tersedia pada package
imputeTS, yang syntax-nya adalah sebagai berikut:
daily$Date <- as.Date(daily$Date)
daily_spline <- na_interpolation(daily, option = "spline")
daily_linear <- na_interpolation(daily, option = "linear")
daily_stine <- na_interpolation(daily, option = "stine")
Setelah dilakukan penanganan, dapat terlihat bahwa nilai IHSG pada tanggal 2 mei 2022 telah terisi dengan nilai \(6840.217\).
Eksplorasi data diperlukan untuk melihat pola dari data time series. Selain itu, eksplorasi juga berguna dalam pembagian data training dan data testing. Data testing yang baik adalah data testing yang memiliki pola seperti data trainingnya. Maka dari itu, diperlukan eksplorasi data untuk menentukan proporsi terbaik data training vs data testing.
asl <- ggplot(daily, aes(x=Date, y=Close)) +
geom_line() +
scale_x_date(date_breaks = "3 month", date_labels = "%Y %b") +
labs(title = "Plot Time Series Data IHSG Harian",
subtitle = "(November 2021 sd November 2022)",
y="Harga IHSG") +
theme(plot.title = element_text(face = "bold", hjust=.5),
plot.subtitle = element_text(hjust=.5))
#linear
lin <- ggplot(daily_linear, aes(x=Date, y=Close)) +
geom_line() +
scale_x_date(date_breaks = "3 month", date_labels = "%Y %b") +
labs(title = "Plot Time Series Data IHSG Harian Linear",
subtitle = "(November 2021 sd November 2022)",
y="Harga IHSG") +
theme(plot.title = element_text(face = "bold", hjust=.5),
plot.subtitle = element_text(hjust=.5))
spl <- ggplot(daily_spline, aes(x=Date, y=Close)) +
geom_line() +
scale_x_date(date_breaks = "3 month", date_labels = "%Y %b") +
labs(title = "Plot Time Series Data IHSG Harian Spline",
subtitle = "(November 2021 sd November 2022)",
y="Harga IHSG") +
theme(plot.title = element_text(face = "bold", hjust=.5),
plot.subtitle = element_text(hjust=.5))
sti <- ggplot(daily_stine, aes(x=Date, y=Close)) +
geom_line() +
scale_x_date(date_breaks = "3 month", date_labels = "%Y %b") +
labs(title = "Plot Time Series Data IHSG Harian Stine",
subtitle = "(November 2021 sd November 2022)",
y="Harga IHSG") +
theme(plot.title = element_text(face = "bold", hjust=.5),
plot.subtitle = element_text(hjust=.5))
ggarrange(asl, lin, spl, sti,
ncol = 2, nrow = 2)
Dari plot di atas, terlihat bahwa data cenderung memiliki pola trend yang naik. Terlihat pula bahwa pembagian data training-testing dapat dilakukan antara bulan april 2022 hingga juli 2022. Hal ini karena pola setelah bulan-bulan tersebut cenderung memiliki pola trend naik pula, sama seperti data trainingnya. Untuk lebih jelasnya, perhatikan penjelasan di bawah ini:
p = 0.67 #setelah dilakukan pencarian nilai proporsi data training terbaik
freq_train=as.integer(p*nrow(daily))
ggplot(daily_linear, aes(x=Date, y=Close)) +
geom_line() +
scale_x_date(date_breaks = "3 month", date_labels = "%Y %b") +
labs(title = "Plot Time Series Data IHSG mingguan",
subtitle = "(November 2021 sd November 2022)") +
geom_vline(aes(xintercept = Date[freq_train],
col="Frequency_Train_Data"), lty=2, lwd=.7) +
theme(plot.title = element_text(face = "bold", hjust=.5),
plot.subtitle = element_text(hjust=.5),
legend.position = "bottom") +
scale_color_manual(name = "", values = c(Frequency_Train_Data="red"))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
ggplot(daily_spline, aes(x=Date, y=Close)) +
geom_line() +
scale_x_date(date_breaks = "3 month", date_labels = "%Y %b") +
labs(title = "Plot Time Series Data IHSG mingguan",
subtitle = "(November 2021 sd November 2022)") +
geom_vline(aes(xintercept = Date[freq_train],
col="Frequency_Train_Data"), lty=2, lwd=.7) +
theme(plot.title = element_text(face = "bold", hjust=.5),
plot.subtitle = element_text(hjust=.5),
legend.position = "bottom") +
scale_color_manual(name = "", values = c(Frequency_Train_Data="red"))
ggplot(daily_stine, aes(x=Date, y=Close)) +
geom_line() +
scale_x_date(date_breaks = "3 month", date_labels = "%Y %b") +
labs(title = "Plot Time Series Data IHSG mingguan",
subtitle = "(November 2021 sd November 2022)") +
geom_vline(aes(xintercept = Date[freq_train],
col="Frequency_Train_Data"), lty=2, lwd=.7) +
theme(plot.title = element_text(face = "bold", hjust=.5),
plot.subtitle = element_text(hjust=.5),
legend.position = "bottom") +
scale_color_manual(name = "", values = c(Frequency_Train_Data="red"))
daily_linear[freq_train,1]
## # A tibble: 1 × 1
## Date
## <date>
## 1 2022-08-01
train <- 1:freq_train
y <- daily_linear$Close
y_train <- ts(y[train])
y_test <- ts(y[-train], start = 88)
ggplot(daily_linear[train,], aes(x=Date, y=Close)) +
geom_line() +
scale_x_date(date_breaks = "3 month", date_labels = "%Y %b") +
labs(title = "Plot Time Series Data Training",
subtitle = "(September 2020 sd Mei 2022)",
y="Harga IHSG") +
theme(plot.title = element_text(face = "bold", hjust=.5),
plot.subtitle = element_text(hjust=.5))
ggplot(daily_linear[-train,], aes(x=Date, y=Close)) +
geom_line() +
scale_x_date(date_breaks = "1 month", date_labels = "%Y %b") +
labs(title = "Plot Time Series Data Testing",
subtitle = "(Mei 2022 sd September 2022)",
y="Harga IHSG") +
theme(plot.title = element_text(face = "bold", hjust=.5),
plot.subtitle = element_text(hjust=.5))
# Cek Kestasioneran Data ## Prosedur Eksploratif
plot.ts(y_train, ylab=expression(Y[t]), main = "Plot Time Series Data IHSG mingguan")
acf(y_train)
## Uji Formal
adf.test(y_train)
##
## Augmented Dickey-Fuller Test
##
## data: y_train
## Dickey-Fuller = -2.2698, Lag order = 6, p-value = 0.4623
## alternative hypothesis: stationary
y_diff <- diff(y_train, differences = 1)
y_diff
## Time Series:
## Start = 2
## End = 244
## Frequency = 1
## [1] 76.14306600 -45.31396500 2.87011733 2.87011733 2.87011733
## [6] 55.45263700 1.22900400 40.13427700 8.98974600 3.31640633
## [11] 3.31640633 3.31640633 -47.23388700 10.61962900 -31.45898400
## [16] 7.13427700 -18.27343767 -18.27343767 -18.27343767 7.19726600
## [21] -24.71630900 25.95800800 7.34912100 4.18131533 4.18131533
## [26] 4.18131533 22.89892500 2.33398500 -19.19482400 20.95654275
## [31] 20.95654275 20.95654275 20.95654275 30.06494200 -33.07421900
## [36] -8.94775400 47.96484400 -3.39729833 -3.39729833 -3.39729833
## [41] -43.15380800 -0.90527400 11.29101600 35.04492200 -16.11767567
## [46] -16.11767567 -16.11767567 -30.98877000 -22.07812500 34.89013700
## [51] 99.50195300 -23.73567700 -23.73567700 -23.73567700 -86.99316400
## [56] 32.64599600 10.34228500 34.35009700 -4.78678367 -4.78678367
## [61] -4.78678367 38.25048800 38.25048800 -23.80078100 47.54003900
## [66] 24.51529967 24.51529967 24.51529967 -15.41503900 45.08398400
## [71] -10.96386700 -8.03515600 -27.04036467 -27.04036467 -27.04036467
## [76] 73.01123000 42.70117200 -15.08105400 57.70068300 3.38232433
## [81] 3.38232433 3.38232433 -40.97070300 58.06201100 -102.23632800
## [86] 70.35107400 8.31750500 8.31750500 8.31750500 8.31750500
## [91] -53.03906300 29.96313500 29.96313500 -19.75406900 -19.75406900
## [96] -19.75406900 -54.88281300 50.25781300 59.56689500 -1.40576200
## [101] 9.86735033 9.86735033 9.86735033 -34.01904300 74.20996100
## [106] -28.01025400 -9.41992200 0.07210267 0.07210267 0.07210267
## [111] 45.64062500 -4.70654300 53.56982500 -47.15283200 15.69026667
## [116] 15.69026667 15.69026667 -37.91406200 41.50097600 18.25195400
## [121] 7.31787100 12.48600233 12.48600233 12.48600233 32.08105500
## [126] -44.08300800 23.15136800 83.46777300 -2.34700533 -2.34700533
## [131] -2.34700533 10.98681700 47.99609300 -27.24462800 9.93920900
## [136] 9.93920900 9.93920900 9.93920900 -76.05712900 28.12988200
## [141] 48.83105500 -50.58691400 -3.20898433 -3.20898433 -3.20898433
## [146] 16.17382800 -35.38867200 32.14990300 -29.01482600 -29.01482600
## [151] -29.01482600 -29.01482600 -29.01482600 -29.01482600 -29.01482600
## [156] -29.01482600 -29.01482600 -29.01482600 -29.01482600 -89.95703200
## [161] -3.59082000 -216.36328100 -1.84668000 11.61840825 11.61840825
## [166] 11.61840825 11.61840825 148.94726600 29.92089800 94.80908200
## [171] -25.78971367 -25.78971367 -25.78971367 73.36621100 -30.63720700
## [176] 71.37597650 71.37597650 3.76969400 3.76969400 3.76969400
## [181] 111.40527400 -0.12353500 -0.12353500 34.23779300 -28.79296900
## [186] -28.79296900 -28.79296900 44.46289100 52.26904300 -10.48388700
## [191] -96.18212900 -30.40201800 -30.40201800 -30.40201800 54.43994100
## [196] -42.83203100 43.27636700 -113.35937500 13.13671867 13.13671867
## [201] 13.13671867 67.69384800 -59.75976600 13.95605500 44.66992200
## [206] -8.96061200 -8.96061200 -8.96061200 -19.59912100 -54.10498100
## [211] -30.76904300 -117.25390600 -51.71875000 -51.71875000 -51.71875000
## [216] 64.09423800 -56.85595700 6.17675800 87.63232400 -6.02473933
## [221] -6.02473933 -6.02473933 -3.85107500 -77.30371100 49.09668000
## [226] -38.18212900 2.44938167 2.44938167 2.44938167 76.83886700
## [231] 138.65136700 -10.61230500 22.83105500 -9.51822900 -9.51822900
## [236] -9.51822900 13.12988200 26.67871100 58.60107500 -5.69384800
## [241] 5.88671867 5.88671867 5.88671867
adf.test(y_diff)
## Warning in adf.test(y_diff): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: y_diff
## Dickey-Fuller = -5.056, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
acf(y_diff)
ARIMA(0,1,2)
pacf(y_diff)
ARIMA(2,1,0)
eacf(y_diff)
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 o x o o o o o o o o o o o o
## 1 o x o o o o o o o o o o o o
## 2 x x o o o o o o o o o o o o
## 3 x o x o o o o o o o o o o o
## 4 x o o x o o o o o o o o o o
## 5 x x x o o o o o o o o o o o
## 6 x x x x o o o o o o o o o o
## 7 x x x x o o o o o o o o o o
ARIMA(0,1,2), ARIMA(1,1,2), ARIMA(0,1,3)
arima012 <- Arima(y_train, order=c(0,1,2), method="ML")
coeftest(arima012)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ma1 -0.019243 0.063429 -0.3034 0.76160
## ma2 0.165620 0.065732 2.5196 0.01175 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
arima112 <- Arima(y_train, order=c(1,1,2), method="ML")
coeftest(arima112)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.173281 0.344427 0.5031 0.61489
## ma1 -0.186199 0.337848 -0.5511 0.58154
## ma2 0.166383 0.066327 2.5085 0.01212 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
arima013 <- Arima(y_train, order=c(0,1,3), method="ML")
coeftest(arima013)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ma1 -0.010284 0.065077 -0.1580 0.87444
## ma2 0.161738 0.066474 2.4331 0.01497 *
## ma3 0.038634 0.065145 0.5931 0.55315
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
arima210 <- Arima(y_train, order=c(2,1,0), method="ML")
coeftest(arima210)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 -0.0060659 0.0636912 -0.0952 0.92412
## ar2 0.1499402 0.0637433 2.3522 0.01866 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1