title: “Topik Dalam Statistika 1” author: “Mika Adi Saputra” date: “2026-05-29” output: html_document —
# =========================================================
# IMPORT LIBRARY
# =========================================================
library(readr)
## Warning: package 'readr' was built under R version 4.5.3
library(portes)
## Warning: package 'portes' was built under R version 4.5.3
library(car)
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.5.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.2
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(forecast)
## Warning: package 'forecast' was built under R version 4.5.3
library(tseries)
## Warning: package 'tseries' was built under R version 4.5.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(TSA)
## Warning: package 'TSA' was built under R version 4.5.3
## Registered S3 methods overwritten by 'TSA':
## method from
## fitted.Arima forecast
## plot.Arima forecast
##
## Attaching package: 'TSA'
## The following object is masked from 'package:readr':
##
## spec
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.2
# =========================================================
# MEMBACA DATA
# =========================================================
data <- read_csv("C:/Topik Statistik 1/AirPassengers.csv")
## Rows: 144 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Month
## dbl (1): #Passengers
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(data)
## # A tibble: 6 × 2
## Month `#Passengers`
## <chr> <dbl>
## 1 1949-01 112
## 2 1949-02 118
## 3 1949-03 132
## 4 1949-04 129
## 5 1949-05 121
## 6 1949-06 135
tail(data)
## # A tibble: 6 × 2
## Month `#Passengers`
## <chr> <dbl>
## 1 1960-07 622
## 2 1960-08 606
## 3 1960-09 508
## 4 1960-10 461
## 5 1960-11 390
## 6 1960-12 432
str(data)
## spc_tbl_ [144 × 2] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Month : chr [1:144] "1949-01" "1949-02" "1949-03" "1949-04" ...
## $ #Passengers: num [1:144] 112 118 132 129 121 135 148 148 136 119 ...
## - attr(*, "spec")=
## .. cols(
## .. Month = col_character(),
## .. `#Passengers` = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
# Mengubah nama kolom
colnames(data)[2] <- "Passengers"
length(data$Passengers)
## [1] 144
# Mengubah format tanggal
data$Month <- as.Date(
paste0(data$Month, "-01")
)
# =========================================================
# FORMING DATA TIME SERIES
# =========================================================
data.ts <- ts(data$Passengers,
start = c(1949,1),
frequency = 12)
# =========================================================
# PLOT
# =========================================================
# Menggunakan function ts.plot
ts.plot(data.ts,
type = "l",
ylab = "Jumlah Penumpang",
col = "blue")
title(main = "Plot Time Series AirPassengers",
cex.sub = 0.8)
points(data.ts,
pch = 20,
col = "blue")
# =========================================================
# Menggunakan function ggplot
# =========================================================
ggplot(data,
aes(x = Month,
y = Passengers)) +
geom_line(color = "blue") +
labs(title = "Data AirPassengers",
x = "Tahun",
y = "Jumlah Penumpang")
# =========================================================
# VISUALISASI PER MUSIM
# =========================================================
seasonplot(data.ts,
12,
main = "Seasonal Plot of AirPassengers",
ylab = "Passengers",
year.labels = TRUE,
col = rainbow(20))
# =========================================================
# VISUALISASI DESKRIPTIF
# =========================================================
monthplot(data.ts,
ylab = "Jumlah Penumpang",
col = "blue")
# =========================================================
# IDENTIFIKASI MODEL
# =========================================================
boxplot(data.ts,
col = "blue",
main = "Boxplot Data AirPassengers")
# =========================================================
# SPLITTING DATA
# =========================================================
train.ts <- window(data.ts,
end = c(1957,12))
test.ts <- window(data.ts,
start = c(1958,1))
# =========================================================
# VISUALISASI DATA TRAINING
# =========================================================
ts.plot(train.ts,
ylab = "Jumlah Penumpang",
xlab = "Time",
col = "blue")
title(main = "Plot Data Train AirPassengers",
cex.sub = 0.8)
points(train.ts,
pch = 20,
col = "blue")
# =========================================================
# VISUALISASI DATA TESTING
# =========================================================
ts.plot(test.ts,
ylab = "Jumlah Penumpang",
xlab = "Time",
col = "blue")
title(main = "Plot Data Test AirPassengers",
cex.sub = 0.8)
points(test.ts,
pch = 20,
col = "blue")
# =========================================================
# ARIMA NON-SEASONAL
# =========================================================
# Identifikasi Kestasioneran Data
acf0 <- acf(train.ts,
main = "ACF",
lag.max = 50,
xaxt = "n",
col = "blue")
axis(1,
at = 0:50/12,
labels = 0:50)
acf0$lag <- acf0$lag * 12
acf0.1 <- as.data.frame(cbind(acf0$acf,
acf0$lag))
acf0.2 <- acf0.1[which(acf0.1$V2 %% 12 == 0), ]
barplot(height = acf0.2$V1,
names.arg = acf0.2$V2,
ylab = "ACF",
xlab = "Lag")
# =========================================================
# DIFFERENCING DATA
# =========================================================
d1 <- diff(train.ts)
ts.plot(d1,
type = "l",
ylab = "Differencing 1",
col = "blue")
# =========================================================
# IDENTIFIKASI KESTASIONERAN DATA HASIL DIFFERENCING
# =========================================================
acf1 <- acf(d1,
lag.max = 50,
xaxt = "n",
main = "ACF d1",
col = "blue")
axis(1,
at = 0:50/12,
labels = 0:50)
acf1$lag <- acf1$lag * 12
acf1.1 <- as.data.frame(cbind(acf1$acf,
acf1$lag))
acf1.2 <- acf1.1[which(acf1.1$V2 %% 12 == 0), ]
barplot(height = acf1.2$V1,
names.arg = acf1.2$V2,
ylab = "ACF",
xlab = "Lag")
# =========================================================
# ARIMA SEASONAL
# =========================================================
# Differencing Data Seasonal
D1 <- diff(train.ts,
12)
ts.plot(D1,
type = "l",
ylab = "Seasonal Differencing",
col = "blue")
acf2 <- acf(D1,
lag.max = 48,
xaxt = "n",
main = "ACF D1",
col = "blue")
acf2$lag <- acf2$lag * 12
acf2.1 <- as.data.frame(cbind(acf2$acf,
acf2$lag))
acf2.2 <- acf2.1[which(acf2.1$V2 %% 12 == 0), ]
barplot(height = acf2.2$V1,
names.arg = acf2.2$V2,
ylab = "ACF",
xlab = "Lag")
# =========================================================
# DIFFERENCING GABUNGAN
# =========================================================
d1D1 <- diff(D1)
ts.plot(d1D1,
type = "l",
ylab = "d1D1",
col = "blue")
# =========================================================
# IDENTIFIKASI MODEL
# =========================================================
acf3 <- acf(d1D1,
lag.max = 24,
xaxt = "n",
main = "ACF d1D1",
col = "blue")
axis(1,
at = 0:24/12,
labels = 0:24)
pacf3 <- pacf(d1D1,
lag.max = 24,
xaxt = "n",
main = "PACF d1D1",
col = "blue")
axis(1,
at = 0:24/12,
labels = 0:24)
acf3$lag <- acf3$lag * 12
acf3.1 <- as.data.frame(cbind(acf3$acf,
acf3$lag))
acf3.2 <- acf3.1[which(acf3.1$V2 %% 12 == 0), ]
barplot(height = acf3.2$V1,
names.arg = acf3.2$V2,
ylab = "ACF",
xlab = "Lag")
pacf3$lag <- pacf3$lag * 12
pacf3.1 <- as.data.frame(cbind(pacf3$acf,
pacf3$lag))
pacf3.2 <- pacf3.1[which(pacf3.1$V2 %% 12 == 0), ]
barplot(height = pacf3.2$V1,
names.arg = pacf3.2$V2,
ylab = "PACF",
xlab = "Lag")
# =========================================================
# EACF
# =========================================================
eacf(d1D1)
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x o x o o o o o o o o o o o
## 1 x o x o o o o o o o o o o o
## 2 x x x o o o o o o o o o o o
## 3 x x x o o o o o o o o o o o
## 4 o o x o o o o o o o o o o o
## 5 x o x o o o o o o o o o o o
## 6 o o o o o o o o o o o o o o
## 7 x x x x o o o o o o o o o o
# =========================================================
# PEMBENTUKAN MODEL ARIMA
# =========================================================
# Model ARIMA Non-Seasonal
tmodel1 <- Arima(train.ts,
order = c(0,1,1))
summary(tmodel1)
## Series: train.ts
## ARIMA(0,1,1)
##
## Coefficients:
## ma1
## 0.4339
## s.e. 0.1134
##
## sigma^2 = 593.2: log likelihood = -493.05
## AIC=990.1 AICc=990.22 BIC=995.45
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.56673 24.12835 19.65591 0.4518527 8.520933 0.6429189 -0.05727306
# Model ARIMA Seasonal
tmodel2 <- Arima(train.ts,
order = c(0,1,1),
seasonal = list(order = c(0,1,1),
period = 12))
summary(tmodel2)
## Series: train.ts
## ARIMA(0,1,1)(0,1,1)[12]
##
## Coefficients:
## ma1 sma1
## -0.2097 -0.1232
## s.e. 0.1080 0.0974
##
## sigma^2 = 93.39: log likelihood = -349.4
## AIC=704.8 AICc=705.07 BIC=712.46
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.3681975 8.967841 6.707961 0.09346515 2.878977 0.2194086
## ACF1
## Training set -0.006128396
# =========================================================
# MODEL TERBAIK
# =========================================================
AICKandidatModel <- c(tmodel1$aic,
tmodel2$aic)
AICcKandidatModel <- c(tmodel1$aicc,
tmodel2$aicc)
BICKandidatModel <- c(tmodel1$bic,
tmodel2$bic)
KandidatModelARIMA <- c("ARIMA(0,1,1)",
"ARIMA(0,1,1)(0,1,1)12")
compmodelARIMA <- cbind(KandidatModelARIMA,
AICKandidatModel,
AICcKandidatModel,
BICKandidatModel)
colnames(compmodelARIMA) <- c("Kandidat Model",
"Nilai AIC",
"Nilai AICc",
"Nilai BIC")
compmodelARIMA <- as.data.frame(compmodelARIMA)
compmodelARIMA
## Kandidat Model Nilai AIC Nilai AICc Nilai BIC
## 1 ARIMA(0,1,1) 990.101617403295 990.21700201868 995.447275072219
## 2 ARIMA(0,1,1)(0,1,1)12 704.801449139834 705.06518540357 712.463079814636
# =========================================================
# DIBANDINGKAN DENGAN AUTO.ARIMA
# =========================================================
auto.arima(train.ts,
seasonal = TRUE,
stepwise = FALSE,
approximation = FALSE)
## Series: train.ts
## ARIMA(0,1,3)(0,1,0)[12]
##
## Coefficients:
## ma1 ma2 ma3
## -0.2753 0.0964 -0.3059
## s.e. 0.0994 0.1132 0.1103
##
## sigma^2 = 88.95: log likelihood = -346.64
## AIC=701.29 AICc=701.73 BIC=711.5
# =========================================================
# PENGUJIAN PARAMETER MODEL
# =========================================================
printstatarima <- function (x,
digits = 4,
se = TRUE){
if(length(x$coef) > 0){
cat("\nCoefficients:\n")
coef <- round(x$coef,
digits = digits)
if(se && nrow(x$var.coef)){
ses <- rep(0, length(coef))
ses[x$mask] <- round(sqrt(diag(x$var.coef)),
digits = digits)
coef <- matrix(coef,
1,
dimnames = list(NULL,
names(coef)))
coef <- rbind(coef,
s.e. = ses)
statt <- coef[1,] / ses
pval <- 2 * pt(abs(statt),
df = length(x$residuals)-1,
lower.tail = FALSE)
coef <- rbind(coef,
t = round(statt,
digits = digits),
sign. = round(pval,
digits = digits))
coef <- t(coef)
}
print.default(coef,
print.gap = 2)
}
}
printstatarima(tmodel2)
##
## Coefficients:
## s.e. t sign.
## ma1 -0.2097 0.1080 -1.9417 0.0548
## sma1 -0.1232 0.0974 -1.2649 0.2087
# =========================================================
# DIAGNOSTIC MODEL
# =========================================================
tsdisplay(residuals(tmodel2),
lag.max = 45,
main = 'ARIMA Seasonal Model Residuals',
col = "blue")
Box.test(residuals(tmodel2),
lag = 12,
type = "Ljung-Box")
##
## Box-Ljung test
##
## data: residuals(tmodel2)
## X-squared = 9.6876, df = 12, p-value = 0.6433
Box.test(residuals(tmodel2),
lag = 24,
type = "Ljung-Box")
##
## Box-Ljung test
##
## data: residuals(tmodel2)
## X-squared = 37.012, df = 24, p-value = 0.04364
jarque.bera.test(residuals(tmodel2))
##
## Jarque Bera Test
##
## data: residuals(tmodel2)
## X-squared = 3.3041, df = 2, p-value = 0.1917
# =========================================================
# FORECASTING
# =========================================================
ramalan_arima2 <- forecast(tmodel2,
h = length(test.ts))
ramalan_arima2
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 1958 346.0770 333.6921 358.4620 327.1359 365.0182
## Feb 1958 332.9001 317.1143 348.6860 308.7577 357.0425
## Mar 1958 385.9825 367.4083 404.5567 357.5758 414.3893
## Apr 1958 378.5491 357.5537 399.5445 346.4395 410.6588
## May 1958 385.2504 362.0855 408.4153 349.8227 420.6781
## Jun 1958 450.6965 425.5486 475.8445 412.2360 489.1570
## Jul 1958 493.3307 466.3451 520.3164 452.0597 534.6018
## Aug 1958 493.9807 465.2747 522.6867 450.0787 537.8827
## Sep 1958 432.8123 402.4834 463.1411 386.4283 479.1963
## Oct 1958 376.9813 345.1121 408.8505 328.2416 425.7210
## Nov 1958 335.8333 302.4949 369.1718 284.8466 386.8201
## Dec 1958 367.3882 332.6426 402.1338 314.2493 420.5271
## Jan 1959 377.2977 336.8805 417.7149 315.4850 439.1105
## Feb 1959 364.1208 319.7248 408.5168 296.2230 432.0186
## Mar 1959 417.2032 369.1568 465.2496 343.7225 490.6839
## Apr 1959 409.7698 358.3314 461.2083 331.1015 488.4382
## May 1959 416.4711 361.8509 471.0913 332.9367 500.0055
## Jun 1959 481.9172 424.2906 539.5438 393.7849 570.0495
## Jul 1959 524.5514 464.0677 585.0351 432.0496 617.0533
## Aug 1959 525.2014 461.9895 588.4132 428.5272 621.8755
## Sep 1959 464.0330 398.2060 529.8599 363.3593 564.7066
## Oct 1959 408.2020 339.8599 476.5441 303.6817 512.7222
## Nov 1959 367.0540 296.2861 437.8220 258.8238 475.2842
## Dec 1959 398.6089 325.4956 471.7222 286.7917 510.4260
## Jan 1960 408.5184 329.7792 487.2577 288.0971 528.9397
## Feb 1960 395.3415 312.1173 478.5657 268.0611 522.6219
## Mar 1960 448.4239 360.9444 535.9034 314.6355 582.2123
## Apr 1960 440.9905 349.4533 532.5278 300.9964 580.9847
## May 1960 447.6918 352.2692 543.1144 301.7555 593.6280
## Jun 1960 513.1379 413.9821 612.2937 361.4922 664.7836
## Jul 1960 555.7721 453.0187 658.5255 398.6243 712.9199
## Aug 1960 556.4220 450.1927 662.6514 393.9583 718.8858
## Sep 1960 495.2536 385.6586 604.8487 327.6425 662.8648
## Oct 1960 439.4227 326.5623 552.2831 266.8176 612.0277
## Nov 1960 398.2747 282.2408 514.3086 220.8162 575.7333
## Dec 1960 429.8296 310.7067 548.9525 247.6468 612.0123
prediksi <- forecast(tmodel2,
h = length(test.ts))
accuracy(prediksi,
test.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.3681975 8.967841 6.707961 0.09346515 2.878977 0.2194086
## Test set -1.0358744 22.562641 17.988540 -1.00841076 4.178255 0.5883816
## ACF1 Theil's U
## Training set -0.006128396 NA
## Test set 0.622144156 0.4562945
plot(ramalan_arima2,
col = "blue")
forecast_arima2 <- cbind(ramalan_arima2$mean,
ramalan_arima2$lower,
ramalan_arima2$upper)
forecast_arima2
## ramalan_arima2$mean ramalan_arima2$lower.80% ramalan_arima2$lower.95%
## Jan 1958 346.0770 333.6921 327.1359
## Feb 1958 332.9001 317.1143 308.7577
## Mar 1958 385.9825 367.4083 357.5758
## Apr 1958 378.5491 357.5537 346.4395
## May 1958 385.2504 362.0855 349.8227
## Jun 1958 450.6965 425.5486 412.2360
## Jul 1958 493.3307 466.3451 452.0597
## Aug 1958 493.9807 465.2747 450.0787
## Sep 1958 432.8123 402.4834 386.4283
## Oct 1958 376.9813 345.1121 328.2416
## Nov 1958 335.8333 302.4949 284.8466
## Dec 1958 367.3882 332.6426 314.2493
## Jan 1959 377.2977 336.8805 315.4850
## Feb 1959 364.1208 319.7248 296.2230
## Mar 1959 417.2032 369.1568 343.7225
## Apr 1959 409.7698 358.3314 331.1015
## May 1959 416.4711 361.8509 332.9367
## Jun 1959 481.9172 424.2906 393.7849
## Jul 1959 524.5514 464.0677 432.0496
## Aug 1959 525.2014 461.9895 428.5272
## Sep 1959 464.0330 398.2060 363.3593
## Oct 1959 408.2020 339.8599 303.6817
## Nov 1959 367.0540 296.2861 258.8238
## Dec 1959 398.6089 325.4956 286.7917
## Jan 1960 408.5184 329.7792 288.0971
## Feb 1960 395.3415 312.1173 268.0611
## Mar 1960 448.4239 360.9444 314.6355
## Apr 1960 440.9905 349.4533 300.9964
## May 1960 447.6918 352.2692 301.7555
## Jun 1960 513.1379 413.9821 361.4922
## Jul 1960 555.7721 453.0187 398.6243
## Aug 1960 556.4220 450.1927 393.9583
## Sep 1960 495.2536 385.6586 327.6425
## Oct 1960 439.4227 326.5623 266.8176
## Nov 1960 398.2747 282.2408 220.8162
## Dec 1960 429.8296 310.7067 247.6468
## ramalan_arima2$upper.80% ramalan_arima2$upper.95%
## Jan 1958 358.4620 365.0182
## Feb 1958 348.6860 357.0425
## Mar 1958 404.5567 414.3893
## Apr 1958 399.5445 410.6588
## May 1958 408.4153 420.6781
## Jun 1958 475.8445 489.1570
## Jul 1958 520.3164 534.6018
## Aug 1958 522.6867 537.8827
## Sep 1958 463.1411 479.1963
## Oct 1958 408.8505 425.7210
## Nov 1958 369.1718 386.8201
## Dec 1958 402.1338 420.5271
## Jan 1959 417.7149 439.1105
## Feb 1959 408.5168 432.0186
## Mar 1959 465.2496 490.6839
## Apr 1959 461.2083 488.4382
## May 1959 471.0913 500.0055
## Jun 1959 539.5438 570.0495
## Jul 1959 585.0351 617.0533
## Aug 1959 588.4132 621.8755
## Sep 1959 529.8599 564.7066
## Oct 1959 476.5441 512.7222
## Nov 1959 437.8220 475.2842
## Dec 1959 471.7222 510.4260
## Jan 1960 487.2577 528.9397
## Feb 1960 478.5657 522.6219
## Mar 1960 535.9034 582.2123
## Apr 1960 532.5278 580.9847
## May 1960 543.1144 593.6280
## Jun 1960 612.2937 664.7836
## Jul 1960 658.5255 712.9199
## Aug 1960 662.6514 718.8858
## Sep 1960 604.8487 662.8648
## Oct 1960 552.2831 612.0277
## Nov 1960 514.3086 575.7333
## Dec 1960 548.9525 612.0123