library(tidyr)
library(readxl)
library(forecast)
## Warning: package 'forecast' was built under R version 4.4.2
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(tseries)
## Warning: package 'tseries' was built under R version 4.4.2
library(knitr)
library(quantmod)
## Loading required package: xts
## Warning: package 'xts' was built under R version 4.4.2
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
##
## extract
library(TTR)
library(TSA)
## Warning: package 'TSA' was built under R version 4.4.2
## Registered S3 methods overwritten by 'TSA':
## method from
## fitted.Arima forecast
## plot.Arima forecast
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
library(graphics)
library(dplyr)
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
##
## first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stats)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.2
library(nortest)
sahamXL <- read_excel("DATA CLOSING XL.xlsx")
sahamXL
## # A tibble: 200 × 2
## Date Price
## <dttm> <dbl>
## 1 2024-02-13 00:00:00 2310
## 2 2024-02-15 00:00:00 2310
## 3 2024-02-16 00:00:00 2270
## 4 2024-02-19 00:00:00 2200
## 5 2024-02-20 00:00:00 2210
## 6 2024-02-21 00:00:00 2230
## 7 2024-02-22 00:00:00 2310
## 8 2024-02-23 00:00:00 2340
## 9 2024-02-26 00:00:00 2290
## 10 2024-02-27 00:00:00 2330
## # ℹ 190 more rows
sahamXL$Date <- as.Date(sahamXL$Date, format = "%d/%m/%Y")
sahamXL$Price<- as.numeric(sahamXL$Price)
sahamXL
## # A tibble: 200 × 2
## Date Price
## <date> <dbl>
## 1 2024-02-13 2310
## 2 2024-02-15 2310
## 3 2024-02-16 2270
## 4 2024-02-19 2200
## 5 2024-02-20 2210
## 6 2024-02-21 2230
## 7 2024-02-22 2310
## 8 2024-02-23 2340
## 9 2024-02-26 2290
## 10 2024-02-27 2330
## # ℹ 190 more rows
summary(sahamXL)
## Date Price
## Min. :2024-02-13 Min. :2080
## 1st Qu.:2024-05-07 1st Qu.:2210
## Median :2024-07-24 Median :2270
## Mean :2024-07-20 Mean :2280
## 3rd Qu.:2024-10-03 3rd Qu.:2320
## Max. :2024-12-13 Max. :2610
str(sahamXL)
## tibble [200 × 2] (S3: tbl_df/tbl/data.frame)
## $ Date : Date[1:200], format: "2024-02-13" "2024-02-15" ...
## $ Price: num [1:200] 2310 2310 2270 2200 2210 2230 2310 2340 2290 2330 ...
datatsXL <- ts(data = sahamXL$Price)
datatsXL
## Time Series:
## Start = 1
## End = 200
## Frequency = 1
## [1] 2310 2310 2270 2200 2210 2230 2310 2340 2290 2330 2330 2380 2560 2490 2540
## [16] 2400 2410 2460 2470 2440 2400 2410 2470 2440 2400 2390 2410 2390 2280 2260
## [31] 2270 2300 2270 2340 2300 2270 2120 2190 2190 2300 2390 2380 2380 2380 2440
## [46] 2470 2420 2480 2410 2430 2480 2610 2600 2580 2380 2440 2460 2470 2470 2460
## [61] 2440 2410 2430 2310 2320 2290 2310 2250 2230 2230 2220 2180 2160 2080 2130
## [76] 2140 2180 2160 2160 2180 2200 2160 2180 2190 2170 2210 2180 2210 2270 2270
## [91] 2270 2280 2240 2240 2240 2240 2180 2200 2250 2210 2190 2180 2170 2170 2170
## [106] 2170 2170 2080 2190 2170 2140 2160 2180 2190 2260 2220 2240 2240 2320 2290
## [121] 2210 2260 2230 2240 2240 2230 2230 2270 2270 2230 2360 2330 2250 2270 2270
## [136] 2290 2320 2290 2280 2300 2280 2300 2290 2310 2350 2310 2290 2260 2200 2260
## [151] 2250 2260 2270 2300 2320 2310 2290 2290 2300 2270 2290 2300 2280 2260 2260
## [166] 2270 2250 2280 2260 2250 2250 2240 2230 2220 2210 2210 2140 2130 2150 2160
## [181] 2140 2150 2180 2170 2210 2180 2190 2180 2180 2220 2230 2240 2230 2250 2310
## [196] 2300 2290 2240 2260 2270
plot((datatsXL),main='DATA CLOSING SAHAM XL')
adf.test((datatsXL))
##
## Augmented Dickey-Fuller Test
##
## data: (datatsXL)
## Dickey-Fuller = -2.8721, Lag order = 5, p-value = 0.2109
## alternative hypothesis: stationary
P-value lebih besar dari alpha = 0.05 sehingga terima H0. Hal ini menyatakan bahwa data tidak stasioner maka diadakan differencing dan transformasi BOX COX
lambda <- BoxCox.lambda(datatsXL)
lambda
## [1] -0.9999242
data_boxcox <- BoxCox(datatsXL, lambda)
data_boxcox
## Time Series:
## Start = 1
## End = 200
## Frequency = 1
## [1] 0.9996426 0.9996426 0.9996349 0.9996209 0.9996230 0.9996270 0.9996426
## [8] 0.9996481 0.9996388 0.9996463 0.9996463 0.9996553 0.9996849 0.9996739
## [15] 0.9996818 0.9996588 0.9996605 0.9996690 0.9996706 0.9996656 0.9996588
## [22] 0.9996605 0.9996706 0.9996656 0.9996588 0.9996571 0.9996605 0.9996571
## [29] 0.9996369 0.9996330 0.9996349 0.9996407 0.9996349 0.9996481 0.9996407
## [36] 0.9996349 0.9996037 0.9996188 0.9996188 0.9996407 0.9996571 0.9996553
## [43] 0.9996553 0.9996553 0.9996656 0.9996706 0.9996623 0.9996723 0.9996605
## [50] 0.9996640 0.9996723 0.9996924 0.9996909 0.9996879 0.9996553 0.9996656
## [57] 0.9996690 0.9996706 0.9996706 0.9996690 0.9996656 0.9996605 0.9996640
## [64] 0.9996426 0.9996444 0.9996388 0.9996426 0.9996310 0.9996270 0.9996270
## [71] 0.9996250 0.9996167 0.9996125 0.9995947 0.9996060 0.9996082 0.9996167
## [78] 0.9996125 0.9996125 0.9996167 0.9996209 0.9996125 0.9996167 0.9996188
## [85] 0.9996146 0.9996230 0.9996167 0.9996230 0.9996349 0.9996349 0.9996349
## [92] 0.9996369 0.9996290 0.9996290 0.9996290 0.9996290 0.9996167 0.9996209
## [99] 0.9996310 0.9996230 0.9996188 0.9996167 0.9996146 0.9996146 0.9996146
## [106] 0.9996146 0.9996146 0.9995947 0.9996188 0.9996146 0.9996082 0.9996125
## [113] 0.9996167 0.9996188 0.9996330 0.9996250 0.9996290 0.9996290 0.9996444
## [120] 0.9996388 0.9996230 0.9996330 0.9996270 0.9996290 0.9996290 0.9996270
## [127] 0.9996270 0.9996349 0.9996349 0.9996270 0.9996517 0.9996463 0.9996310
## [134] 0.9996349 0.9996349 0.9996388 0.9996444 0.9996388 0.9996369 0.9996407
## [141] 0.9996369 0.9996407 0.9996388 0.9996426 0.9996499 0.9996426 0.9996388
## [148] 0.9996330 0.9996209 0.9996330 0.9996310 0.9996330 0.9996349 0.9996407
## [155] 0.9996444 0.9996426 0.9996388 0.9996388 0.9996407 0.9996349 0.9996388
## [162] 0.9996407 0.9996369 0.9996330 0.9996330 0.9996349 0.9996310 0.9996369
## [169] 0.9996330 0.9996310 0.9996310 0.9996290 0.9996270 0.9996250 0.9996230
## [176] 0.9996230 0.9996082 0.9996060 0.9996103 0.9996125 0.9996082 0.9996103
## [183] 0.9996167 0.9996146 0.9996230 0.9996167 0.9996188 0.9996167 0.9996167
## [190] 0.9996250 0.9996270 0.9996290 0.9996270 0.9996310 0.9996426 0.9996407
## [197] 0.9996388 0.9996290 0.9996330 0.9996349
## attr(,"lambda")
## [1] -0.9999242
adf.test(data_boxcox)
##
## Augmented Dickey-Fuller Test
##
## data: data_boxcox
## Dickey-Fuller = -2.8559, Lag order = 5, p-value = 0.2176
## alternative hypothesis: stationary
P-value lebih besar dari alpha = 0.05 sehingga terima H0. Hal ini menyatakan bahwa data tidak stasioner maka diadakan differencing
datatsXL.dif1<- diff(data_boxcox)
plot(datatsXL.dif1)
adf.test(datatsXL.dif1)
## Warning in adf.test(datatsXL.dif1): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: datatsXL.dif1
## Dickey-Fuller = -5.5681, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
P-value lebih kecil dari alpha = 0.05 sehingga tolak H0. Hal ini menyatakan bahwa data sudah stasioner
acf(datatsXL.dif1, main = "ACF Data Differencing")
acf(datatsXL.dif1, plot=F)
##
## Autocorrelations of series 'datatsXL.dif1', by lag
##
## 1 2 3 4 5 6 7 8 9 10 11
## -0.154 -0.014 -0.142 -0.033 0.085 0.086 -0.041 0.038 -0.067 0.062 -0.035
## 12 13 14 15 16 17 18 19 20 21 22
## -0.016 0.074 -0.075 -0.120 -0.061 -0.036 0.021 0.036 -0.078 0.021 -0.116
pacf(datatsXL.dif1, main = "PACF Data Differencing")
pacf(datatsXL.dif1 , plot=F)
##
## Partial autocorrelations of series 'datatsXL.dif1', by lag
##
## 1 2 3 4 5 6 7 8 9 10 11
## -0.154 -0.039 -0.154 -0.085 0.057 0.088 -0.022 0.057 -0.021 0.047 -0.026
## 12 13 14 15 16 17 18 19 20 21 22
## -0.036 0.075 -0.062 -0.159 -0.112 -0.082 -0.078 -0.013 -0.072 0.017 -0.093
LAG ACF (1,3)
LAG PACF(1,3)
Differencing (1)
ARIMA (p,d,q)
Kombinasi ARIMA
ARIMA (1,1,1)
ARIMA (1,1,3)
ARIMA (3,1,1)
ARIMA (3,1,3)
model1 <- Arima(datatsXL, order=c(1,1,1), method="ML")
model2 <- Arima(datatsXL, order=c(1,1,3), method="ML")
model3 <- Arima(datatsXL, order=c(3,1,1), method="ML")
model4 <- Arima(datatsXL, order=c(3,1,3), method="ML")
summary(model1)
## Series: datatsXL
## ARIMA(1,1,1)
##
## Coefficients:
## ar1 ma1
## 0.4430 -0.6123
## s.e. 0.2085 0.1803
##
## sigma^2 = 2024: log likelihood = -1038.85
## AIC=2083.71 AICc=2083.83 BIC=2093.59
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.280664 44.64701 30.35846 -0.03637538 1.323012 0.9681623
## ACF1
## Training set 0.002812923
lmtest::coeftest((model1))
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.44303 0.20854 2.1244 0.0336368 *
## ma1 -0.61232 0.18035 -3.3952 0.0006857 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model2)
## Series: datatsXL
## ARIMA(1,1,3)
##
## Coefficients:
## ar1 ma1 ma2 ma3
## -0.0001 -0.1612 -0.0027 -0.1590
## s.e. 0.2844 0.2755 0.0900 0.0696
##
## sigma^2 = 2006: log likelihood = -1036.98
## AIC=2083.95 AICc=2084.26 BIC=2100.42
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.2598156 44.22235 30.44917 -0.03526897 1.327269 0.9710554
## ACF1
## Training set -0.0007224278
lmtest::coeftest((model2))
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 -0.00013549 0.28437702 -0.0005 0.99962
## ma1 -0.16122624 0.27553054 -0.5851 0.55845
## ma2 -0.00265208 0.09000067 -0.0295 0.97649
## ma3 -0.15896433 0.06963234 -2.2829 0.02244 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model3)
## Series: datatsXL
## ARIMA(3,1,1)
##
## Coefficients:
## ar1 ar2 ar3 ma1
## 0.0611 -0.0315 -0.1834 -0.2246
## s.e. 0.2303 0.0776 0.0721 0.2280
##
## sigma^2 = 1992: log likelihood = -1036.31
## AIC=2082.62 AICc=2082.93 BIC=2099.09
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.2566756 44.07081 30.49014 -0.03517357 1.329117 0.9723617
## ACF1
## Training set -0.005370386
lmtest::coeftest((model3))
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.061128 0.230309 0.2654 0.79069
## ar2 -0.031486 0.077580 -0.4058 0.68485
## ar3 -0.183394 0.072149 -2.5419 0.01103 *
## ma1 -0.224573 0.227964 -0.9851 0.32456
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model4)
## Series: datatsXL
## ARIMA(3,1,3)
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 ma3
## 0.5844 -0.369 -0.5282 -0.7666 0.4879 0.3773
## s.e. 0.3484 0.391 0.3346 0.3778 0.4504 0.3620
##
## sigma^2 = 1946: log likelihood = -1033.66
## AIC=2081.32 AICc=2081.9 BIC=2104.37
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.2422154 43.33057 30.50917 -0.03057183 1.331272 0.9729689
## ACF1
## Training set -0.005055608
lmtest::coeftest((model4))
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.58436 0.34838 1.6774 0.09347 .
## ar2 -0.36899 0.39104 -0.9436 0.34537
## ar3 -0.52825 0.33459 -1.5788 0.11438
## ma1 -0.76660 0.37780 -2.0291 0.04245 *
## ma2 0.48794 0.45036 1.0835 0.27861
## ma3 0.37727 0.36204 1.0421 0.29738
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
aicmodelXL <- data.frame("Model"=c("Arima 1","Arima 2","Arima 3","Arima 4"), "p,d,q"=c("1,1,1", "1,1,3", "3,1,1", "3,1,3"),"AIC"=c(AIC(model1),AIC(model2),AIC(model3), AIC(model4)))
aicmodelXL
## Model p.d.q AIC
## 1 Arima 1 1,1,1 2083.705
## 2 Arima 2 1,1,3 2083.954
## 3 Arima 3 3,1,1 2082.622
## 4 Arima 4 3,1,3 2081.317
Berdasarkan Nilai AIC terkecil, maka didapat bahwa model terbaik adalah Model Arima 4 dengan nilai AIC 2081.317
ULB <- Arima(datatsXL, order = c(3,1,3))
checkresiduals(ULB)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(3,1,3)
## Q* = 6.942, df = 4, p-value = 0.139
##
## Model df: 6. Total lags used: 10
p-value uji Ljung-Box tinggi lebih besar dari alpha = 0.05 sehingga terima H0. Hal ini berarti model yang dibangun sudah cukup baik, karena residualnya bersifat acak dan tidak terjadi autokorelasi.
frcst = forecast(model4, h=10)
frcst
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 201 2273.295 2216.766 2329.823 2186.842 2359.748
## 202 2270.129 2197.105 2343.152 2158.449 2381.808
## 203 2265.869 2179.066 2352.673 2133.115 2398.624
## 204 2262.808 2166.111 2359.506 2114.922 2410.694
## 205 2264.264 2157.496 2371.031 2100.976 2427.551
## 206 2268.494 2151.415 2385.572 2089.438 2447.549
## 207 2272.045 2144.355 2399.735 2076.760 2467.330
## 208 2271.791 2134.571 2409.012 2061.930 2481.652
## 209 2268.098 2122.960 2413.236 2046.128 2490.067
## 210 2264.157 2112.417 2415.896 2032.091 2496.222
plot(frcst)