Projek-TDS-I

Lukas Alfredo Setiandra Akar

2026-05-19

Import Library

library(readr)
## Warning: package 'readr' was built under R version 4.5.2
library(portes)
## Warning: package 'portes' was built under R version 4.5.3
library(car)
## Loading required package: carData
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(forecast)
## Warning: package 'forecast' was built under R version 4.5.3
library(tseries)
## Warning: package 'tseries' was built under R version 4.5.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(TSA)
## Warning: package 'TSA' was built under R version 4.5.3
## Registered S3 methods overwritten by 'TSA':
##   method       from    
##   fitted.Arima forecast
##   plot.Arima   forecast
## 
## Attaching package: 'TSA'
## The following object is masked from 'package:readr':
## 
##     spec
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## The following object is masked from 'package:utils':
## 
##     tar
library(ggplot2)

Membaca Data

data <- read_csv("~/Belajar kuliah lukas/Topik Statistik/PROJEK UAS TDS I/BBCA.csv")
## Rows: 5670 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (5): open, low, high, close, volume
## date (1): timestamp
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(data)
## # A tibble: 6 × 6
##   timestamp   open   low  high close volume
##   <date>     <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 2001-04-16   175   175   180   177      0
## 2 2001-04-17   175   175   180   177      0
## 3 2001-04-18   175   175   180   177      0
## 4 2001-04-19   175   175   180   177      0
## 5 2001-04-20   175   175   180   177      0
## 6 2001-04-23   175   175   180   177      0
tail(data)
## # A tibble: 6 × 6
##   timestamp   open   low  high close    volume
##   <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>
## 1 2022-12-30  8575  8550  8650  8550  44681400
## 2 2023-01-02  8575  8500  8600  8550  10653900
## 3 2023-01-03  8550  8525  8600  8550  27399100
## 4 2023-01-04  8525  8350  8575  8350  90918800
## 5 2023-01-05  8350  8150  8375  8250 128838500
## 6 2023-01-06  8100  8100  8325  8300  69286600
length(data$timestamp)
## [1] 5670

Forming Data Time Series

data.ts <- ts(data$close, frequency=5)

Plot

Menggunakan function ts.plot

ts.plot(data.ts, type="l", ylab="Harga Penutupan Saham", col="blue")
title(main = "Plot Time Series Harga Penutupan Saham BBCA", cex.sub = 0.8)
points(data.ts, pch = 20, col = "blue")

Menggunakan function ggplot

ggplot(data, aes(x = timestamp, y = close)) +
  geom_line(color = "blue") +
  labs(title = "Harga Penutupan Saham BBCA",
       x = "Tanggal",
       y = "Close Price")

## Visualisasi Per Musim

seasonplot(data.ts,5,main="Seasonal Plot of close", ylab="times",year.labels = TRUE, 
col=rainbow(20))

Visualisasi Deskriptif

monthplot(data.ts, ylab="Harga Penutupan Saham", col="blue")

data$hari <- weekdays(as.Date(data$timestamp))

data$hari <- factor(data$hari,
                    levels = c("Monday","Tuesday","Wednesday",
                               "Thursday","Friday"))

boxplot(close ~ hari,
        data = data,
        ylab = "Harga Penutupan Saham",
        xlab = "Hari Perdagangan",
        col = "blue",
        main = "Boxplot Harga Saham per Hari")

Identifikasi Model

fligner.test(close ~ hari, data=data)
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  close by hari
## Fligner-Killeen:med chi-squared = 2.1993, df = 4, p-value = 0.6992

Splitting Data

train.ts <- data.ts[1:4500]
test.ts  <- data.ts[4501:5670]

Visualisasi Data Training

ts.plot(train.ts, ylab="Harga Penutupan Saham", xlab="Date", col="blue")
title(main = "Plot Data Train Harga Penutupan Saham", cex.sub = 0.8)
points(train.ts, pch = 20, col = "blue")

Visualisasi Data Testing

ts.plot(test.ts, ylab="Harga Penutupan Saham", xlab="Date", col="blue")
title(main = "Plot Data Test Harga Penutupan Saham", cex.sub = 0.8)
points(train.ts, pch = 20, col = "blue")

Arima Non-Seasonal

Identifikasi Kestasioneran Data

acf0 <- acf(train.ts, main="ACF", lag.max=50, xaxt="n", col="blue")
axis(1, at=0:50/5, labels=0:50)

acf0$lag <- acf0$lag * 5
acf0.1 <- as.data.frame(cbind(acf0$acf,acf0$lag))
acf0.2 <- acf0.1[which(acf0.1$V2%%5==0),]
barplot(height = acf0.2$V1, 
names.arg=acf0.2$V2, ylab="ACF", xlab="Lag")

Differencing Data

d1 <- diff(train.ts)
ts.plot(d1, type="l", ylab="d1 close", col="blue")

Identifikasi Kestasioneran Data Hasil Differencing

acf1 <- acf(d1,lag.max=50,xaxt="n", main="ACF d1", col="blue")
axis(1, at=0:50/5, labels=0:50)

acf2 <- acf1$lag <- acf1$lag * 12
acf1.1 <- as.data.frame(cbind(acf1$acf,acf1$lag))
acf1.2 <- acf1.1[which(acf1.1$V2%%12==0),]
barplot(height = acf1.2$V1, names.arg=acf1.2$V2, ylab="ACF", xlab="Lag")

Arima Seasonal

Differencing Data Seasonal

D1 <- diff(train.ts,5)
ts.plot(D1, type="l", ylab="D1 Xt", col="blue")

acf2<-acf(D1,lag.max=48,xaxt="n", main="ACF D1", col="blue")

acf2$lag <- acf2$lag * 12
acf2.1 <- as.data.frame(cbind(acf2$acf,acf2$lag))
acf2.2 <- acf2.1[which(acf2.1$V2%%12==0),]
barplot(height = acf2.2$V1, names.arg=acf2.2$V2, ylab="ACF", xlab="Lag")

Differencing

d1D1 <- diff(D1)
ts.plot(d1D1, type="l", ylab="d1 D1 Xt", col="blue")

Identifikasi Model

acf3 <- acf(d1D1,lag.max=10,xaxt="n", main="ACF d1D1", col="blue")
axis(1, at=0:50/5, labels=0:50)

pacf3 <- pacf(d1D1,lag.max=10,xaxt="n", main="ACF d1D1", col="blue")
axis(1, at=0:50/5, labels=0:50)

acf3$lag <- acf3$lag * 5
acf3.1 <- as.data.frame(cbind(acf3$acf,acf3$lag))
acf3.2 <- acf3.1[which(acf3.1$V2%%5==0),]
barplot(height = acf3.2$V1, names.arg=acf3.2$V2, ylab="ACF", 
xlab="Lag")

pacf3$lag <- pacf3$lag * 5
pacf3.1 <- as.data.frame(cbind(pacf3$acf,pacf3$lag))
pacf3.2 <- pacf3.1[which(pacf3.1$V2%%5==0),]
barplot(height = pacf3.2$V1, names.arg=pacf3.2$V2, ylab="PACF", xlab="Lag")

Seasonal ARIMA diperoleh \[ARIMA(0,1,1)_5\]

eacf(d1D1)
## AR/MA
##   0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 o o x o x o o x o o x  o  o  o 
## 1 x o x o x x o x x o x  o  o  o 
## 2 x x o o x x x x o o x  o  o  o 
## 3 x x x o x x x x x o o  o  o  o 
## 4 x x o o x x x x x o o  o  o  o 
## 5 x o o o x x x x x x o  o  o  o 
## 6 x x o o x x o x x x o  o  o  o 
## 7 x x o o x x o x x x x  x  o  o

Karena ARIMA Non-Seasonal dan ARIMA Seasonal sudah stasioner, diperoleh komponen non-seasonal adalah \[ARIMA(0,1,1)\], dan komponen seasonal adalah \[ARIMA(0,1,1)_5\]

tmodel1 <- Arima(train.ts,order=c(0,1,1))
summary(tmodel1)
## Series: train.ts 
## ARIMA(0,1,1) 
## 
## Coefficients:
##           ma1
##       -0.0772
## s.e.   0.0157
## 
## sigma^2 = 709.2:  log likelihood = -21149.24
## AIC=42302.48   AICc=42302.48   BIC=42315.3
## 
## Training set error measures:
##                    ME     RMSE      MAE        MPE     MAPE     MASE
## Training set 1.065422 26.62432 14.66592 0.06178061 1.070776 1.009362
##                     ACF1
## Training set 0.002147916
tmodel2 <- Arima(train.ts, order=c(0,1,1), seasonal=list(order=c(0,1,1), period=5))

summary(tmodel2)
## Series: train.ts 
## ARIMA(0,1,1)(0,1,1)[5] 
## 
## Coefficients:
##           ma1     sma1
##       -0.0792  -0.9992
## s.e.   0.0158   0.0041
## 
## sigma^2 = 708.9:  log likelihood = -21139.79
## AIC=42285.58   AICc=42285.58   BIC=42304.81
## 
## Training set error measures:
##                     ME     RMSE      MAE        MPE     MAPE     MASE
## Training set 0.6554206 26.60136 14.67277 0.03317414 1.072343 1.009833
##                     ACF1
## Training set 0.003247358

Model Terbaik

AICKandidatModel <- c(tmodel1$aic, tmodel2$aic)
AICcKandidatModel <- c(tmodel1$aicc, tmodel2$aicc)
BICKandidatModel <- c(tmodel1$bic, tmodel2$bic)
KandidatModelARIMA <- c("ARIMA(0,1,1)", "ARIMA(0,1,1)(0,1,1)5")
compmodelARIMA <- cbind(KandidatModelARIMA, AICKandidatModel, AICcKandidatModel, BICKandidatModel)
colnames(compmodelARIMA) <- c("Kandidat Model", "Nilai AIC", "Nilai AICc", "Nilai BIC")
compmodelARIMA <- as.data.frame(compmodelARIMA)
compmodelARIMA
##         Kandidat Model        Nilai AIC       Nilai AICc        Nilai BIC
## 1         ARIMA(0,1,1) 42302.4765771042 42302.4792461433 42315.2997979618
## 2 ARIMA(0,1,1)(0,1,1)5 42285.5786948285   42285.58404004 42304.8101901867

Dibandingkan dengan function auto.arima

auto.arima(train.ts)
## Series: train.ts 
## ARIMA(5,2,0) 
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5
##       -0.8682  -0.6941  -0.5366  -0.3898  -0.1675
## s.e.   0.0147   0.0187   0.0198   0.0187   0.0148
## 
## sigma^2 = 856.1:  log likelihood = -21566.63
## AIC=43145.26   AICc=43145.28   BIC=43183.73

Pengujian Parameter Model

printstatarima <- function (x, digits = 4,se=TRUE){
       if (length(x$coef) > 0) {
         cat("\nCoefficients:\n")
         coef <- round(x$coef, digits = digits)
         if (se && nrow(x$var.coef)) {
           ses <- rep(0, length(coef))
           ses[x$mask] <- round(sqrt(diag(x$var.coef)), digits = digits)
           coef <- matrix(coef, 1, dimnames = list(NULL, names(coef)))
           coef <- rbind(coef, s.e. = ses)
           statt <- coef[1,]/ses
           pval  <- 2*pt(abs(statt), df=length(x$residuals)-1, lower.tail = FALSE)
           coef <- rbind(coef, t=round(statt,digits=digits),sign.=round(pval,digits=digits))
           coef <- t(coef)
         }
         print.default(coef, print.gap = 2)
       }
}
printstatarima(tmodel2)
## 
## Coefficients:
##                  s.e.          t  sign.
## ma1   -0.0792  0.0158    -5.0127      0
## sma1  -0.9992  0.0041  -243.7073      0

Diagnostic Model

tsdisplay(residuals(tmodel2), lag.max=45, main='ARIMA(0,1,1)(0,1,1)5 Model Residuals', col="blue")

Box.test(residuals(tmodel2),
         lag = 5,
         type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  residuals(tmodel2)
## X-squared = 50.886, df = 5, p-value = 9.125e-10
Box.test(residuals(tmodel2),
         lag = 10,
         type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  residuals(tmodel2)
## X-squared = 67.944, df = 10, p-value = 1.104e-10
Box.test(residuals(tmodel2),
         lag = 15,
         type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  residuals(tmodel2)
## X-squared = 75.899, df = 15, p-value = 3.893e-10
jarque.bera.test(residuals(tmodel2))
## 
##  Jarque Bera Test
## 
## data:  residuals(tmodel2)
## X-squared = 17990, df = 2, p-value < 2.2e-16

Forecasting

ramalan_arima2=forecast(tmodel2, 6)
ramalan_arima2
##      Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 4501       4601.714 4567.584 4635.844 4549.517 4653.911
## 4502       4602.893 4556.498 4649.288 4531.938 4673.848
## 4503       4604.984 4548.947 4661.022 4519.283 4690.686
## 4504       4606.537 4542.289 4670.785 4508.278 4704.796
## 4505       4606.377 4534.854 4677.900 4496.992 4715.762
## 4506       4606.930 4528.790 4685.071 4487.425 4726.436
prediksi <- forecast(tmodel2, h = length(test.ts))

accuracy(prediksi, test.ts)
##                        ME       RMSE        MAE         MPE      MAPE      MASE
## Training set    0.6554206   26.60136   14.67277  0.03317414  1.072343  1.009833
## Test set     1258.2275424 1483.04712 1270.09631 18.00265241 18.256169 87.412625
##                     ACF1
## Training set 0.003247358
## Test set              NA
plot(ramalan_arima2, col="blue")

forecast_arima2 <- cbind(ramalan_arima2$mean,ramalan_arima2$lower,ramalan_arima2$upper)
forecast_arima2
## Time Series:
## Start = 4501 
## End = 4506 
## Frequency = 1 
##      ramalan_arima2$mean ramalan_arima2$lower.80% ramalan_arima2$lower.95%
## 4501            4601.714                 4567.584                 4549.517
## 4502            4602.893                 4556.498                 4531.938
## 4503            4604.984                 4548.947                 4519.283
## 4504            4606.537                 4542.289                 4508.278
## 4505            4606.377                 4534.854                 4496.992
## 4506            4606.930                 4528.790                 4487.425
##      ramalan_arima2$upper.80% ramalan_arima2$upper.95%
## 4501                 4635.844                 4653.911
## 4502                 4649.288                 4673.848
## 4503                 4661.022                 4690.686
## 4504                 4670.785                 4704.796
## 4505                 4677.900                 4715.762
## 4506                 4685.071                 4726.436