Import Library
library(readr)
## Warning: package 'readr' was built under R version 4.5.2
library(portes)
## Warning: package 'portes' was built under R version 4.5.3
library(car)
## Loading required package: carData
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(forecast)
## Warning: package 'forecast' was built under R version 4.5.3
library(tseries)
## Warning: package 'tseries' was built under R version 4.5.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(TSA)
## Warning: package 'TSA' was built under R version 4.5.3
## Registered S3 methods overwritten by 'TSA':
## method from
## fitted.Arima forecast
## plot.Arima forecast
##
## Attaching package: 'TSA'
## The following object is masked from 'package:readr':
##
## spec
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
library(ggplot2)
Membaca Data
data <- read_csv("~/Belajar kuliah lukas/Topik Statistik/PROJEK UAS TDS I/BBCA.csv")
## Rows: 5670 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (5): open, low, high, close, volume
## date (1): timestamp
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(data)
## # A tibble: 6 × 6
## timestamp open low high close volume
## <date> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2001-04-16 175 175 180 177 0
## 2 2001-04-17 175 175 180 177 0
## 3 2001-04-18 175 175 180 177 0
## 4 2001-04-19 175 175 180 177 0
## 5 2001-04-20 175 175 180 177 0
## 6 2001-04-23 175 175 180 177 0
tail(data)
## # A tibble: 6 × 6
## timestamp open low high close volume
## <date> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2022-12-30 8575 8550 8650 8550 44681400
## 2 2023-01-02 8575 8500 8600 8550 10653900
## 3 2023-01-03 8550 8525 8600 8550 27399100
## 4 2023-01-04 8525 8350 8575 8350 90918800
## 5 2023-01-05 8350 8150 8375 8250 128838500
## 6 2023-01-06 8100 8100 8325 8300 69286600
length(data$timestamp)
## [1] 5670
Forming Data Time Series
data.ts <- ts(data$close, frequency=5)
Plot
Menggunakan function ts.plot
ts.plot(data.ts, type="l", ylab="Harga Penutupan Saham", col="blue")
title(main = "Plot Time Series Harga Penutupan Saham BBCA", cex.sub = 0.8)
points(data.ts, pch = 20, col = "blue")
Menggunakan function ggplot
ggplot(data, aes(x = timestamp, y = close)) +
geom_line(color = "blue") +
labs(title = "Harga Penutupan Saham BBCA",
x = "Tanggal",
y = "Close Price")
## Visualisasi Per Musim
seasonplot(data.ts,5,main="Seasonal Plot of close", ylab="times",year.labels = TRUE,
col=rainbow(20))
Visualisasi Deskriptif
monthplot(data.ts, ylab="Harga Penutupan Saham", col="blue")
data$hari <- weekdays(as.Date(data$timestamp))
data$hari <- factor(data$hari,
levels = c("Monday","Tuesday","Wednesday",
"Thursday","Friday"))
boxplot(close ~ hari,
data = data,
ylab = "Harga Penutupan Saham",
xlab = "Hari Perdagangan",
col = "blue",
main = "Boxplot Harga Saham per Hari")
Identifikasi Model
fligner.test(close ~ hari, data=data)
##
## Fligner-Killeen test of homogeneity of variances
##
## data: close by hari
## Fligner-Killeen:med chi-squared = 2.1993, df = 4, p-value = 0.6992
Splitting Data
train.ts <- data.ts[1:4500]
test.ts <- data.ts[4501:5670]
Visualisasi Data Training
ts.plot(train.ts, ylab="Harga Penutupan Saham", xlab="Date", col="blue")
title(main = "Plot Data Train Harga Penutupan Saham", cex.sub = 0.8)
points(train.ts, pch = 20, col = "blue")
Visualisasi Data Testing
ts.plot(test.ts, ylab="Harga Penutupan Saham", xlab="Date", col="blue")
title(main = "Plot Data Test Harga Penutupan Saham", cex.sub = 0.8)
points(train.ts, pch = 20, col = "blue")
Arima Non-Seasonal
Identifikasi Kestasioneran Data
acf0 <- acf(train.ts, main="ACF", lag.max=50, xaxt="n", col="blue")
axis(1, at=0:50/5, labels=0:50)
acf0$lag <- acf0$lag * 5
acf0.1 <- as.data.frame(cbind(acf0$acf,acf0$lag))
acf0.2 <- acf0.1[which(acf0.1$V2%%5==0),]
barplot(height = acf0.2$V1,
names.arg=acf0.2$V2, ylab="ACF", xlab="Lag")
Differencing Data
d1 <- diff(train.ts)
ts.plot(d1, type="l", ylab="d1 close", col="blue")
Identifikasi Kestasioneran Data Hasil Differencing
acf1 <- acf(d1,lag.max=50,xaxt="n", main="ACF d1", col="blue")
axis(1, at=0:50/5, labels=0:50)
acf2 <- acf1$lag <- acf1$lag * 12
acf1.1 <- as.data.frame(cbind(acf1$acf,acf1$lag))
acf1.2 <- acf1.1[which(acf1.1$V2%%12==0),]
barplot(height = acf1.2$V1, names.arg=acf1.2$V2, ylab="ACF", xlab="Lag")
Arima Seasonal
Differencing Data Seasonal
D1 <- diff(train.ts,5)
ts.plot(D1, type="l", ylab="D1 Xt", col="blue")
acf2<-acf(D1,lag.max=48,xaxt="n", main="ACF D1", col="blue")
acf2$lag <- acf2$lag * 12
acf2.1 <- as.data.frame(cbind(acf2$acf,acf2$lag))
acf2.2 <- acf2.1[which(acf2.1$V2%%12==0),]
barplot(height = acf2.2$V1, names.arg=acf2.2$V2, ylab="ACF", xlab="Lag")
Differencing
d1D1 <- diff(D1)
ts.plot(d1D1, type="l", ylab="d1 D1 Xt", col="blue")
Identifikasi Model
acf3 <- acf(d1D1,lag.max=10,xaxt="n", main="ACF d1D1", col="blue")
axis(1, at=0:50/5, labels=0:50)
pacf3 <- pacf(d1D1,lag.max=10,xaxt="n", main="ACF d1D1", col="blue")
axis(1, at=0:50/5, labels=0:50)
acf3$lag <- acf3$lag * 5
acf3.1 <- as.data.frame(cbind(acf3$acf,acf3$lag))
acf3.2 <- acf3.1[which(acf3.1$V2%%5==0),]
barplot(height = acf3.2$V1, names.arg=acf3.2$V2, ylab="ACF",
xlab="Lag")
pacf3$lag <- pacf3$lag * 5
pacf3.1 <- as.data.frame(cbind(pacf3$acf,pacf3$lag))
pacf3.2 <- pacf3.1[which(pacf3.1$V2%%5==0),]
barplot(height = pacf3.2$V1, names.arg=pacf3.2$V2, ylab="PACF", xlab="Lag")
Seasonal ARIMA diperoleh \[ARIMA(0,1,1)_5\]
eacf(d1D1)
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 o o x o x o o x o o x o o o
## 1 x o x o x x o x x o x o o o
## 2 x x o o x x x x o o x o o o
## 3 x x x o x x x x x o o o o o
## 4 x x o o x x x x x o o o o o
## 5 x o o o x x x x x x o o o o
## 6 x x o o x x o x x x o o o o
## 7 x x o o x x o x x x x x o o
Karena ARIMA Non-Seasonal dan ARIMA Seasonal sudah stasioner, diperoleh komponen non-seasonal adalah \[ARIMA(0,1,1)\], dan komponen seasonal adalah \[ARIMA(0,1,1)_5\]
tmodel1 <- Arima(train.ts,order=c(0,1,1))
summary(tmodel1)
## Series: train.ts
## ARIMA(0,1,1)
##
## Coefficients:
## ma1
## -0.0772
## s.e. 0.0157
##
## sigma^2 = 709.2: log likelihood = -21149.24
## AIC=42302.48 AICc=42302.48 BIC=42315.3
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 1.065422 26.62432 14.66592 0.06178061 1.070776 1.009362
## ACF1
## Training set 0.002147916
tmodel2 <- Arima(train.ts, order=c(0,1,1), seasonal=list(order=c(0,1,1), period=5))
summary(tmodel2)
## Series: train.ts
## ARIMA(0,1,1)(0,1,1)[5]
##
## Coefficients:
## ma1 sma1
## -0.0792 -0.9992
## s.e. 0.0158 0.0041
##
## sigma^2 = 708.9: log likelihood = -21139.79
## AIC=42285.58 AICc=42285.58 BIC=42304.81
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.6554206 26.60136 14.67277 0.03317414 1.072343 1.009833
## ACF1
## Training set 0.003247358
Model Terbaik
AICKandidatModel <- c(tmodel1$aic, tmodel2$aic)
AICcKandidatModel <- c(tmodel1$aicc, tmodel2$aicc)
BICKandidatModel <- c(tmodel1$bic, tmodel2$bic)
KandidatModelARIMA <- c("ARIMA(0,1,1)", "ARIMA(0,1,1)(0,1,1)5")
compmodelARIMA <- cbind(KandidatModelARIMA, AICKandidatModel, AICcKandidatModel, BICKandidatModel)
colnames(compmodelARIMA) <- c("Kandidat Model", "Nilai AIC", "Nilai AICc", "Nilai BIC")
compmodelARIMA <- as.data.frame(compmodelARIMA)
compmodelARIMA
## Kandidat Model Nilai AIC Nilai AICc Nilai BIC
## 1 ARIMA(0,1,1) 42302.4765771042 42302.4792461433 42315.2997979618
## 2 ARIMA(0,1,1)(0,1,1)5 42285.5786948285 42285.58404004 42304.8101901867
Dibandingkan dengan function auto.arima
auto.arima(train.ts)
## Series: train.ts
## ARIMA(5,2,0)
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5
## -0.8682 -0.6941 -0.5366 -0.3898 -0.1675
## s.e. 0.0147 0.0187 0.0198 0.0187 0.0148
##
## sigma^2 = 856.1: log likelihood = -21566.63
## AIC=43145.26 AICc=43145.28 BIC=43183.73
Pengujian Parameter Model
printstatarima <- function (x, digits = 4,se=TRUE){
if (length(x$coef) > 0) {
cat("\nCoefficients:\n")
coef <- round(x$coef, digits = digits)
if (se && nrow(x$var.coef)) {
ses <- rep(0, length(coef))
ses[x$mask] <- round(sqrt(diag(x$var.coef)), digits = digits)
coef <- matrix(coef, 1, dimnames = list(NULL, names(coef)))
coef <- rbind(coef, s.e. = ses)
statt <- coef[1,]/ses
pval <- 2*pt(abs(statt), df=length(x$residuals)-1, lower.tail = FALSE)
coef <- rbind(coef, t=round(statt,digits=digits),sign.=round(pval,digits=digits))
coef <- t(coef)
}
print.default(coef, print.gap = 2)
}
}
printstatarima(tmodel2)
##
## Coefficients:
## s.e. t sign.
## ma1 -0.0792 0.0158 -5.0127 0
## sma1 -0.9992 0.0041 -243.7073 0
Diagnostic Model
tsdisplay(residuals(tmodel2), lag.max=45, main='ARIMA(0,1,1)(0,1,1)5 Model Residuals', col="blue")
Box.test(residuals(tmodel2),
lag = 5,
type = "Ljung-Box")
##
## Box-Ljung test
##
## data: residuals(tmodel2)
## X-squared = 50.886, df = 5, p-value = 9.125e-10
Box.test(residuals(tmodel2),
lag = 10,
type = "Ljung-Box")
##
## Box-Ljung test
##
## data: residuals(tmodel2)
## X-squared = 67.944, df = 10, p-value = 1.104e-10
Box.test(residuals(tmodel2),
lag = 15,
type = "Ljung-Box")
##
## Box-Ljung test
##
## data: residuals(tmodel2)
## X-squared = 75.899, df = 15, p-value = 3.893e-10
jarque.bera.test(residuals(tmodel2))
##
## Jarque Bera Test
##
## data: residuals(tmodel2)
## X-squared = 17990, df = 2, p-value < 2.2e-16
Forecasting
ramalan_arima2=forecast(tmodel2, 6)
ramalan_arima2
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 4501 4601.714 4567.584 4635.844 4549.517 4653.911
## 4502 4602.893 4556.498 4649.288 4531.938 4673.848
## 4503 4604.984 4548.947 4661.022 4519.283 4690.686
## 4504 4606.537 4542.289 4670.785 4508.278 4704.796
## 4505 4606.377 4534.854 4677.900 4496.992 4715.762
## 4506 4606.930 4528.790 4685.071 4487.425 4726.436
prediksi <- forecast(tmodel2, h = length(test.ts))
accuracy(prediksi, test.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.6554206 26.60136 14.67277 0.03317414 1.072343 1.009833
## Test set 1258.2275424 1483.04712 1270.09631 18.00265241 18.256169 87.412625
## ACF1
## Training set 0.003247358
## Test set NA
plot(ramalan_arima2, col="blue")
forecast_arima2 <- cbind(ramalan_arima2$mean,ramalan_arima2$lower,ramalan_arima2$upper)
forecast_arima2
## Time Series:
## Start = 4501
## End = 4506
## Frequency = 1
## ramalan_arima2$mean ramalan_arima2$lower.80% ramalan_arima2$lower.95%
## 4501 4601.714 4567.584 4549.517
## 4502 4602.893 4556.498 4531.938
## 4503 4604.984 4548.947 4519.283
## 4504 4606.537 4542.289 4508.278
## 4505 4606.377 4534.854 4496.992
## 4506 4606.930 4528.790 4487.425
## ramalan_arima2$upper.80% ramalan_arima2$upper.95%
## 4501 4635.844 4653.911
## 4502 4649.288 4673.848
## 4503 4661.022 4690.686
## 4504 4670.785 4704.796
## 4505 4677.900 4715.762
## 4506 4685.071 4726.436