Jangan lupa, untuk memasukan library-library yang dibutuhkan.
library("tseries")
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library("forecast")
library("TTR")
library("TSA")
## Registered S3 methods overwritten by 'TSA':
## method from
## fitted.Arima forecast
## plot.Arima forecast
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
library("graphics")
library("astsa")
##
## Attaching package: 'astsa'
## The following object is masked from 'package:forecast':
##
## gas
library("car")
## Loading required package: carData
library("portes")
library(readxl)
USpass<-read_excel("D:/TUGAS/R/datalatihan.xlsx")
class(USpass)
## [1] "tbl_df" "tbl" "data.frame"
head(USpass)
## # A tibble: 6 × 1
## `Sales (in thousands)`
## <dbl>
## 1 10618.
## 2 10538.
## 3 10209.
## 4 10553
## 5 9935.
## 6 10534.
Di sini dilakukan perubahan data USpass dari data.frame menjadi data ts atau Time Series
USpass.ts <- ts(USpass, frequency = 12, start = c(1960,1), end = c(1977,12))
class(USpass.ts)
## [1] "ts"
#1. Plot Time Series
# Plot untuk data penjualan tunggal
plot.ts(USpass.ts, main = "Monthly Sales Data", xlab = "Year", ylab = "Sales (in thousand)")
Plot data asal memperlihatkan pola musiman dengan s = 12, serta terdapat perilaku nonstasioner baik dalam rataan maupun ragam.
seasonplot(USpass.ts, 12, main = "Monthly Sales Data", ylab = "Sales (in thousand)",
year.labels = TRUE, col = rainbow(length(unique(floor(time(USpass.ts)))))
)
monthplot(USpass.ts, ylab = "Sales (in thousand)", main = "Average Monthly Sales")
# Boxplot bulanan untuk data penjualan
boxplot(USpass.ts ~ cycle(USpass.ts), xlab = "Month", ylab = "Sales (in thousand)",
main = "Monthly Sales Data - Boxplot")
Boxplot menunjukkan ukuran keragaman (panjang box) dan pemusatan
(median) yang berbeda pada masing-masing bulan # Identifikasi Model ##
Transformasi Logaritma
lnAPM=log(USpass.ts)
monthplot(lnAPM,ylab="Air Passenger (miles)")
boxplot(lnAPM ~ cycle(lnAPM), xlab = "Month", ylab = "ln(APM)", main = "Monthly U.S. Air Passenger Data - Boxplot")
Boxplot menunjukkan ukuran keragaman (panjang box) hampir sama akan
tetapi ukuran pemusatan (median) yang berbeda pada masing-masing
bulan
fligner.test(USpass.ts~cycle(USpass.ts))
##
## Fligner-Killeen test of homogeneity of variances
##
## data: USpass.ts by cycle(USpass.ts)
## Fligner-Killeen:med chi-squared = 21.074, df = 11, p-value = 0.03261
Nilai-p kurang dari 5% sehinga tolak H0 dengan kata lain ragam tidak homogen
fligner.test(USpass.ts~cycle(USpass.ts))
##
## Fligner-Killeen test of homogeneity of variances
##
## data: USpass.ts by cycle(USpass.ts)
## Fligner-Killeen:med chi-squared = 21.074, df = 11, p-value = 0.03261
Nilai-p lebih dari 5% sehingga tidak tolak H0 dengan kata lain ragam data setelah di ln kan homogen ## Pembagian data lnAPM menjadi training-testing Data setahun terakhir digunakan untuk data testing sementara data sebelumnya untuk membangun model
lpass<- subset(lnAPM,start=1,end=204)
test <- subset(lnAPM,start=205,end=216)
acf0<-acf(lpass,main="ACF",lag.max=48,xaxt="n")
axis(1, at=0:48/12, labels=0:48)
acf0$lag=acf0$lag*12
acf0.1 <- as.data.frame(cbind(acf0$acf,acf0$lag))
acf0.2 <- acf0.1[which(acf0.1$V2%%12==0),]
barplot(height = acf0.2$V1, names.arg=acf0.2$V2, ylab="ACF", xlab="Lag")
Plot ACF menunjukkan data ln US Pass tidak stasioner baik di series non musiman maupun musiman
diff1.lpass=diff(lpass)
plot(diff1.lpass, main = "Time series plot of lpas d=1")
Differencing non musiman d = 1 berhasil mengatasi ketidakstasioneran
dalam rataan untuk komponen non musimannya.
acf1 <- acf(diff1.lpass,lag.max=48,xaxt="n", main="ACF d1")
axis(1, at=0:48/12, labels=0:48)
acf1$lag <- acf1$lag * 12
acf1.1 <- as.data.frame(cbind(acf1$acf,acf1$lag))
acf1.2 <- acf1.1[which(acf1.1$V2%%12==0),]
barplot(height=acf1.2$V1,names.arg=acf1.2$V2,ylab="ACF",xlab="lag")
Plot ACF data nonseasonal differencing d = 1, mengkonfirmasi
kestasioneran komponen non musiman (namun perhatikan lag 12,24, dst),
pada series musiman belum stasioner
diff12.lpass <- diff(lpass,12)
plot(diff12.lpass, main = "Time series plot of ln(APM) D=12")
axis(1, at=0:48/12, labels=0:48)
acf2<- acf(diff12.lpass,lag.max=48,xaxt="n", main="ACF d1D1")
acf2$lag <- acf2$lag * 12
acf2.1 <- as.data.frame(cbind(acf2$acf,acf2$lag))
acf2.2 <- acf2.1[which(acf2.1$V2%%12==0),]
barplot(height=acf2.2$V1,names.arg=acf2.2$V2,ylab="ACF", xlab="Lag")
Nonseasonal differencing D = 12 berhasil mengatasi ketidakstasioneran
dalam rataan untuk komponen seasonalnya (namun tidak untuk komponen non
musimannya).
diff12.1lpass <- diff(diff12.lpass,1)
plot(diff12.1lpass, main = "Time series plot of ln(APM) d=1, D=12")
## Identifikasi Model
acf2(diff12.1lpass,48)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## ACF -0.41 0.09 -0.23 0.02 -0.04 -0.04 0.08 0.06 0.05 -0.05 0.23 -0.48
## PACF -0.41 -0.09 -0.27 -0.23 -0.19 -0.29 -0.21 -0.12 -0.06 -0.06 0.37 -0.22
## [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## ACF 0.13 -0.04 0.10 0.05 -0.07 0.15 -0.07 -0.08 0.00 -0.02 0.02 0.06
## PACF -0.20 -0.01 -0.19 -0.12 -0.19 -0.12 -0.06 -0.13 -0.05 -0.11 0.15 -0.06
## [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36]
## ACF 0.00 0.03 0.02 -0.10 0.05 -0.14 0.04 0.10 0.05 -0.06 -0.01 -0.04
## PACF -0.13 0.00 0.02 -0.03 -0.06 -0.10 -0.13 -0.02 0.14 -0.13 0.12 0.04
## [,37] [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48]
## ACF 0.04 -0.04 -0.07 0.09 0.06 -0.03 0.10 -0.20 0.02 0.04 0.00 0.00
## PACF -0.07 0.09 -0.11 -0.11 0.12 -0.15 0.07 -0.03 -0.03 0.07 0.05 -0.11
Kedua komponen telah stasioner. Identifikasi komponen non musiman adalah ARIMA(0,1,2) . Identifikasi komponen musiman adalah ARIMA(0,1,1)12 , sehingga model tentatif adalah ARIMA(0,1,2)×(0,1,1)12 .
eacf(diff12.1lpass)
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x o x o o o o o o o x x o o
## 1 x x x x o o o o o o o x o o
## 2 x x x o o o o o o o o x o o
## 3 x o x o o o o o o o o x x x
## 4 x o x o o o o o o o o x o o
## 5 x x x x o o o o o o o x o x
## 6 x x o o x o o o o o o x x o
## 7 x o o o x o o o o o o x o o
auto.arima(lpass)
## Series: lpass
## ARIMA(2,0,2)(1,0,0)[12] with non-zero mean
##
## Coefficients:
## ar1 ar2 ma1 ma2 sar1 mean
## 1.2168 -0.7331 -1.0939 0.5399 -0.0194 9.2470
## s.e. 0.1446 0.1528 0.1762 0.1861 0.0792 0.0012
##
## sigma^2 = 0.0004248: log likelihood = 505.37
## AIC=-996.75 AICc=-996.18 BIC=-973.52
ARIMA(0,1,2) × (0,1,1)12
model1 <- Arima(lpass,order=c(0,1,2),seasonal=c(0,1,1))
summary(model1)
## Series: lpass
## ARIMA(0,1,2)(0,1,1)[12]
##
## Coefficients:
## ma1 ma2 sma1
## -0.8389 -0.1611 -1.0000
## s.e. 0.0667 0.0627 0.0616
##
## sigma^2 = 0.0004305: log likelihood = 450.88
## AIC=-893.76 AICc=-893.54 BIC=-880.75
##
## Training set error measures:
## ME RMSE MAE MPE MAPE
## Training set -0.0003240709 0.01991827 0.01580201 -0.003916568 0.170899
## MASE ACF1
## Training set 0.6346118 0.01782754
ARIMA(2,1,1) × (1,1,1)12
model2<-Arima(lpass,order=c(2,1,1),seasonal=c(1,1,1))
summary(model2)
## Series: lpass
## ARIMA(2,1,1)(1,1,1)[12]
##
## Coefficients:
## ar1 ar2 ma1 sar1 sma1
## 0.1881 -0.0220 -1.0000 -0.1349 -1.0000
## s.e. 0.0725 0.0736 0.0236 0.0784 0.0669
##
## sigma^2 = 0.0004201: log likelihood = 452.65
## AIC=-893.3 AICc=-892.84 BIC=-873.79
##
## Training set error measures:
## ME RMSE MAE MPE MAPE
## Training set -0.0001950609 0.01957233 0.01559679 -0.002509951 0.1686852
## MASE ACF1
## Training set 0.6263701 -0.006421489
ARIMA(0,1,2) × (0,1,1)12
model3<-Arima(lpass,order=c(1,1,2),seasonal=c(1,1,1))
summary(model3)
## Series: lpass
## ARIMA(1,1,2)(1,1,1)[12]
##
## Coefficients:
## ar1 ma1 ma2 sar1 sma1
## 0.1588 -0.9732 -0.0268 -0.1329 -1.0000
## s.e. 0.1970 0.1896 0.1882 0.0780 0.0668
##
## sigma^2 = 0.0004205: log likelihood = 452.62
## AIC=-893.23 AICc=-892.77 BIC=-873.72
##
## Training set error measures:
## ME RMSE MAE MPE MAPE
## Training set -0.0001789155 0.01957988 0.01562929 -0.002335584 0.1690369
## MASE ACF1
## Training set 0.6276754 0.001679716
Di sini digunakan User Defined Function “printstatarima”
printstatarima <- function (x, digits = 4,se=TRUE){
if (length(x$coef) > 0) {
cat("\nCoefficients:\n")
coef <- round(x$coef, digits = digits)
if (se && nrow(x$var.coef)) {
ses <- rep(0, length(coef))
ses[x$mask] <- round(sqrt(diag(x$var.coef)), digits = digits)
coef <- matrix(coef, 1, dimnames = list(NULL, names(coef)))
coef <- rbind(coef, s.e. = ses)
statt <- coef[1,]/ses
pval <- 2*pt(abs(statt), df=length(x$residuals)-1, lower.tail = FALSE)
coef <- rbind(coef, t=round(statt,digits=digits),sign.=round(pval,digits=digits))
coef <- t(coef)
}
print.default(coef, print.gap = 2)
}
}
printstatarima(model1)
##
## Coefficients:
## s.e. t sign.
## ma1 -0.8389 0.0667 -12.5772 0.0000
## ma2 -0.1611 0.0627 -2.5694 0.0109
## sma1 -1.0000 0.0616 -16.2338 0.0000
printstatarima(model2)
##
## Coefficients:
## s.e. t sign.
## ar1 0.1881 0.0725 2.5945 0.0102
## ar2 -0.0220 0.0736 -0.2989 0.7653
## ma1 -1.0000 0.0236 -42.3729 0.0000
## sar1 -0.1349 0.0784 -1.7207 0.0868
## sma1 -1.0000 0.0669 -14.9477 0.0000
printstatarima(model3)
##
## Coefficients:
## s.e. t sign.
## ar1 0.1588 0.1970 0.8061 0.4211
## ma1 -0.9732 0.1896 -5.1329 0.0000
## ma2 -0.0268 0.1882 -0.1424 0.8869
## sar1 -0.1329 0.0780 -1.7038 0.0899
## sma1 -1.0000 0.0668 -14.9701 0.0000
Model terbaik adalah model 1 karena semua dugaan parameternya berpengaruh nyata. Jika ada beberapa model yang semua dugaan parameternya nyata maka yang dipilih adalah yang nilai keakuratannya paling tinggi
#checkresiduals(model1,lag=c(10,30))
Residual telah menyebar normal dan tidak terdapat autokorelasi
tsdisplay(residuals(model1), lag.max=45, main='ARIMA(0,1,2)(0,1,1)12 Model Residuals')
library(portes)
ljbtest <- LjungBox(residuals(model1),lags=seq(5,30,5))
ljbtest
## lags statistic df p-value
## 5 20.48645 5 0.001012458
## 10 24.55089 10 0.006265017
## 15 27.77288 15 0.023034753
## 20 33.17072 20 0.032313293
## 25 37.49339 25 0.051797969
## 30 44.67411 30 0.041381099
Tidak terdapat autokorelasi pada sisaan
library(tseries)
jarque.bera.test(residuals(model1))
##
## Jarque Bera Test
##
## data: residuals(model1)
## X-squared = 1.2282, df = 2, p-value = 0.5411
Sisaan tidak menyebar normal
##Validasi Model
ramalan_arima = forecast(model1, 12)
accuracy(ramalan_arima,test)
## ME RMSE MAE MPE MAPE
## Training set -0.0003240709 0.01991827 0.01580201 -0.003916568 0.1708990
## Test set -0.0026494849 0.01682258 0.01321211 -0.028979995 0.1428423
## MASE ACF1 Theil's U
## Training set 0.6346118 0.01782754 NA
## Test set 0.5306009 -0.16044177 0.5859166
plot(ramalan_arima)
forecast_arima <- cbind(ramalan_arima$mean,ramalan_arima$lower,ramalan_arima$upper)
forecast_arima <- cbind(USpass[205:216, 1], exp(forecast_arima))
forecast_arima <- as.data.frame(forecast_arima)
str(forecast_arima)
## 'data.frame': 12 obs. of 6 variables:
## $ Sales (in thousands) : num NA NA NA NA NA NA NA NA NA NA ...
## $ ramalan_arima$mean : num 10437 10324 10255 10423 10282 ...
## $ ramalan_arima$lower.80%: num 10153 10039 9972 10136 9998 ...
## $ ramalan_arima$lower.95%: num 10006 9891 9826 9987 9851 ...
## $ ramalan_arima$upper.80%: num 10729 10617 10546 10719 10574 ...
## $ ramalan_arima$upper.95%: num 10886 10776 10704 10879 10732 ...
ncol(forecast_arima)
## [1] 6
# Tentukan range untuk x dan y
x_range <- 1:nrow(forecast_arima)
y_min <- min(forecast_arima[,4], na.rm=TRUE) # kolom lower.95%
y_max <- max(forecast_arima[,2], na.rm=TRUE) # kolom mean
# Buat plot
plot(x_range,
forecast_arima[,2], # kolom mean
type = "l",
col = "black",
xlim = c(min(x_range), max(x_range)),
ylim = c(y_min, y_max),
xlab = "Period",
ylab = "Sales (in thousands)",
main = "Forecast Plot with Confidence Intervals")
# Tambahkan garis confidence intervals
lines(x_range, forecast_arima[,3], col='blue', lwd=1) # kolom lower.80%
lines(x_range, forecast_arima[,4], col='red', lwd=1) # kolom lower.95%
# Tambahkan legend
legend("topleft",
legend=c("Mean Forecast", "Lower 80%", "Lower 95%"),
col=c("black", "blue", "red"),
lty=1,
lwd=1,
cex=0.8,
box.lty=0)
head(forecast_arima)
## Sales (in thousands) ramalan_arima$mean ramalan_arima$lower.80%
## 1 NA 10436.83 10152.945
## 2 NA 10323.94 10038.926
## 3 NA 10255.34 9972.216
## 4 NA 10423.43 10135.673
## 5 NA 10281.95 9998.096
## 6 NA 10359.81 10073.806
## ramalan_arima$lower.95% ramalan_arima$upper.80% ramalan_arima$upper.95%
## 1 10005.803 10728.66 10886.43
## 2 9891.248 10617.04 10775.56
## 3 9825.519 10546.49 10703.95
## 4 9986.572 10719.36 10879.41
## 5 9851.019 10573.86 10731.73
## 6 9925.615 10653.93 10813.00