title: “Topik Dalam Statistika 1” author: “Mika Adi Saputra” date: “2026-05-29” output: html_document —

# =========================================================
# IMPORT LIBRARY
# =========================================================
library(readr)
## Warning: package 'readr' was built under R version 4.5.3
library(portes)
## Warning: package 'portes' was built under R version 4.5.3
library(car)
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.5.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.2
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(forecast)
## Warning: package 'forecast' was built under R version 4.5.3
library(tseries)
## Warning: package 'tseries' was built under R version 4.5.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(TSA)
## Warning: package 'TSA' was built under R version 4.5.3
## Registered S3 methods overwritten by 'TSA':
##   method       from    
##   fitted.Arima forecast
##   plot.Arima   forecast
## 
## Attaching package: 'TSA'
## The following object is masked from 'package:readr':
## 
##     spec
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## The following object is masked from 'package:utils':
## 
##     tar
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.2
# =========================================================
# MEMBACA DATA
# =========================================================
data <- read_csv("C:/Topik Statistik 1/AirPassengers.csv")
## Rows: 144 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Month
## dbl (1): #Passengers
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(data)
## # A tibble: 6 × 2
##   Month   `#Passengers`
##   <chr>           <dbl>
## 1 1949-01           112
## 2 1949-02           118
## 3 1949-03           132
## 4 1949-04           129
## 5 1949-05           121
## 6 1949-06           135
tail(data)
## # A tibble: 6 × 2
##   Month   `#Passengers`
##   <chr>           <dbl>
## 1 1960-07           622
## 2 1960-08           606
## 3 1960-09           508
## 4 1960-10           461
## 5 1960-11           390
## 6 1960-12           432
str(data)
## spc_tbl_ [144 × 2] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Month      : chr [1:144] "1949-01" "1949-02" "1949-03" "1949-04" ...
##  $ #Passengers: num [1:144] 112 118 132 129 121 135 148 148 136 119 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Month = col_character(),
##   ..   `#Passengers` = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
# Mengubah nama kolom
colnames(data)[2] <- "Passengers"

length(data$Passengers)
## [1] 144
# Mengubah format tanggal
data$Month <- as.Date(
  paste0(data$Month, "-01")
)
# =========================================================
# FORMING DATA TIME SERIES
# =========================================================
data.ts <- ts(data$Passengers,
              start = c(1949,1),
              frequency = 12)

# =========================================================
# PLOT
# =========================================================

# Menggunakan function ts.plot
ts.plot(data.ts,
        type = "l",
        ylab = "Jumlah Penumpang",
        col = "blue")

title(main = "Plot Time Series AirPassengers",
      cex.sub = 0.8)

points(data.ts,
       pch = 20,
       col = "blue")

# =========================================================
# Menggunakan function ggplot
# =========================================================
ggplot(data,
       aes(x = Month,
           y = Passengers)) +
  geom_line(color = "blue") +
  labs(title = "Data AirPassengers",
       x = "Tahun",
       y = "Jumlah Penumpang")

# =========================================================
# VISUALISASI PER MUSIM
# =========================================================
seasonplot(data.ts,
           12,
           main = "Seasonal Plot of AirPassengers",
           ylab = "Passengers",
           year.labels = TRUE,
           col = rainbow(20))

# =========================================================
# VISUALISASI DESKRIPTIF
# =========================================================
monthplot(data.ts,
          ylab = "Jumlah Penumpang",
          col = "blue")

# =========================================================
# IDENTIFIKASI MODEL
# =========================================================
boxplot(data.ts,
        col = "blue",
        main = "Boxplot Data AirPassengers")

# =========================================================
# SPLITTING DATA
# =========================================================
train.ts <- window(data.ts,
                   end = c(1957,12))

test.ts <- window(data.ts,
                  start = c(1958,1))
# =========================================================
# VISUALISASI DATA TRAINING
# =========================================================
ts.plot(train.ts,
        ylab = "Jumlah Penumpang",
        xlab = "Time",
        col = "blue")

title(main = "Plot Data Train AirPassengers",
      cex.sub = 0.8)

points(train.ts,
       pch = 20,
       col = "blue")

# =========================================================
# VISUALISASI DATA TESTING
# =========================================================
ts.plot(test.ts,
        ylab = "Jumlah Penumpang",
        xlab = "Time",
        col = "blue")

title(main = "Plot Data Test AirPassengers",
      cex.sub = 0.8)

points(test.ts,
       pch = 20,
       col = "blue")

# =========================================================
# ARIMA NON-SEASONAL
# =========================================================

# Identifikasi Kestasioneran Data
acf0 <- acf(train.ts,
            main = "ACF",
            lag.max = 50,
            xaxt = "n",
            col = "blue")

axis(1,
     at = 0:50/12,
     labels = 0:50)

acf0$lag <- acf0$lag * 12

acf0.1 <- as.data.frame(cbind(acf0$acf,
                              acf0$lag))

acf0.2 <- acf0.1[which(acf0.1$V2 %% 12 == 0), ]

barplot(height = acf0.2$V1,
        names.arg = acf0.2$V2,
        ylab = "ACF",
        xlab = "Lag")

# =========================================================
# DIFFERENCING DATA
# =========================================================
d1 <- diff(train.ts)

ts.plot(d1,
        type = "l",
        ylab = "Differencing 1",
        col = "blue")

# =========================================================
# IDENTIFIKASI KESTASIONERAN DATA HASIL DIFFERENCING
# =========================================================
acf1 <- acf(d1,
            lag.max = 50,
            xaxt = "n",
            main = "ACF d1",
            col = "blue")

axis(1,
     at = 0:50/12,
     labels = 0:50)

acf1$lag <- acf1$lag * 12

acf1.1 <- as.data.frame(cbind(acf1$acf,
                              acf1$lag))

acf1.2 <- acf1.1[which(acf1.1$V2 %% 12 == 0), ]

barplot(height = acf1.2$V1,
        names.arg = acf1.2$V2,
        ylab = "ACF",
        xlab = "Lag")

# =========================================================
# ARIMA SEASONAL
# =========================================================

# Differencing Data Seasonal
D1 <- diff(train.ts,
           12)

ts.plot(D1,
        type = "l",
        ylab = "Seasonal Differencing",
        col = "blue")

acf2 <- acf(D1,
            lag.max = 48,
            xaxt = "n",
            main = "ACF D1",
            col = "blue")

acf2$lag <- acf2$lag * 12

acf2.1 <- as.data.frame(cbind(acf2$acf,
                              acf2$lag))

acf2.2 <- acf2.1[which(acf2.1$V2 %% 12 == 0), ]

barplot(height = acf2.2$V1,
        names.arg = acf2.2$V2,
        ylab = "ACF",
        xlab = "Lag")

# =========================================================
# DIFFERENCING GABUNGAN
# =========================================================
d1D1 <- diff(D1)

ts.plot(d1D1,
        type = "l",
        ylab = "d1D1",
        col = "blue")

# =========================================================
# IDENTIFIKASI MODEL
# =========================================================
acf3 <- acf(d1D1,
            lag.max = 24,
            xaxt = "n",
            main = "ACF d1D1",
            col = "blue")

axis(1,
     at = 0:24/12,
     labels = 0:24)

pacf3 <- pacf(d1D1,
              lag.max = 24,
              xaxt = "n",
              main = "PACF d1D1",
              col = "blue")

axis(1,
     at = 0:24/12,
     labels = 0:24)

acf3$lag <- acf3$lag * 12

acf3.1 <- as.data.frame(cbind(acf3$acf,
                              acf3$lag))

acf3.2 <- acf3.1[which(acf3.1$V2 %% 12 == 0), ]

barplot(height = acf3.2$V1,
        names.arg = acf3.2$V2,
        ylab = "ACF",
        xlab = "Lag")

pacf3$lag <- pacf3$lag * 12

pacf3.1 <- as.data.frame(cbind(pacf3$acf,
                               pacf3$lag))

pacf3.2 <- pacf3.1[which(pacf3.1$V2 %% 12 == 0), ]

barplot(height = pacf3.2$V1,
        names.arg = pacf3.2$V2,
        ylab = "PACF",
        xlab = "Lag")

# =========================================================
# EACF
# =========================================================
eacf(d1D1)
## AR/MA
##   0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x o x o o o o o o o o  o  o  o 
## 1 x o x o o o o o o o o  o  o  o 
## 2 x x x o o o o o o o o  o  o  o 
## 3 x x x o o o o o o o o  o  o  o 
## 4 o o x o o o o o o o o  o  o  o 
## 5 x o x o o o o o o o o  o  o  o 
## 6 o o o o o o o o o o o  o  o  o 
## 7 x x x x o o o o o o o  o  o  o
# =========================================================
# PEMBENTUKAN MODEL ARIMA
# =========================================================

# Model ARIMA Non-Seasonal
tmodel1 <- Arima(train.ts,
                 order = c(0,1,1))

summary(tmodel1)
## Series: train.ts 
## ARIMA(0,1,1) 
## 
## Coefficients:
##          ma1
##       0.4339
## s.e.  0.1134
## 
## sigma^2 = 593.2:  log likelihood = -493.05
## AIC=990.1   AICc=990.22   BIC=995.45
## 
## Training set error measures:
##                   ME     RMSE      MAE       MPE     MAPE      MASE        ACF1
## Training set 1.56673 24.12835 19.65591 0.4518527 8.520933 0.6429189 -0.05727306
# Model ARIMA Seasonal
tmodel2 <- Arima(train.ts,
                 order = c(0,1,1),
                 seasonal = list(order = c(0,1,1),
                                 period = 12))

summary(tmodel2)
## Series: train.ts 
## ARIMA(0,1,1)(0,1,1)[12] 
## 
## Coefficients:
##           ma1     sma1
##       -0.2097  -0.1232
## s.e.   0.1080   0.0974
## 
## sigma^2 = 93.39:  log likelihood = -349.4
## AIC=704.8   AICc=705.07   BIC=712.46
## 
## Training set error measures:
##                     ME     RMSE      MAE        MPE     MAPE      MASE
## Training set 0.3681975 8.967841 6.707961 0.09346515 2.878977 0.2194086
##                      ACF1
## Training set -0.006128396
# =========================================================
# MODEL TERBAIK
# =========================================================
AICKandidatModel <- c(tmodel1$aic,
                      tmodel2$aic)

AICcKandidatModel <- c(tmodel1$aicc,
                       tmodel2$aicc)

BICKandidatModel <- c(tmodel1$bic,
                      tmodel2$bic)

KandidatModelARIMA <- c("ARIMA(0,1,1)",
                         "ARIMA(0,1,1)(0,1,1)12")

compmodelARIMA <- cbind(KandidatModelARIMA,
                        AICKandidatModel,
                        AICcKandidatModel,
                        BICKandidatModel)

colnames(compmodelARIMA) <- c("Kandidat Model",
                              "Nilai AIC",
                              "Nilai AICc",
                              "Nilai BIC")

compmodelARIMA <- as.data.frame(compmodelARIMA)

compmodelARIMA
##          Kandidat Model        Nilai AIC      Nilai AICc        Nilai BIC
## 1          ARIMA(0,1,1) 990.101617403295 990.21700201868 995.447275072219
## 2 ARIMA(0,1,1)(0,1,1)12 704.801449139834 705.06518540357 712.463079814636
# =========================================================
# DIBANDINGKAN DENGAN AUTO.ARIMA
# =========================================================
auto.arima(train.ts,
           seasonal = TRUE,
           stepwise = FALSE,
           approximation = FALSE)
## Series: train.ts 
## ARIMA(0,1,3)(0,1,0)[12] 
## 
## Coefficients:
##           ma1     ma2      ma3
##       -0.2753  0.0964  -0.3059
## s.e.   0.0994  0.1132   0.1103
## 
## sigma^2 = 88.95:  log likelihood = -346.64
## AIC=701.29   AICc=701.73   BIC=711.5
# =========================================================
# PENGUJIAN PARAMETER MODEL
# =========================================================
printstatarima <- function (x,
                            digits = 4,
                            se = TRUE){

  if(length(x$coef) > 0){

    cat("\nCoefficients:\n")

    coef <- round(x$coef,
                  digits = digits)

    if(se && nrow(x$var.coef)){

      ses <- rep(0, length(coef))

      ses[x$mask] <- round(sqrt(diag(x$var.coef)),
                           digits = digits)

      coef <- matrix(coef,
                     1,
                     dimnames = list(NULL,
                                     names(coef)))

      coef <- rbind(coef,
                    s.e. = ses)

      statt <- coef[1,] / ses

      pval <- 2 * pt(abs(statt),
                     df = length(x$residuals)-1,
                     lower.tail = FALSE)

      coef <- rbind(coef,
                    t = round(statt,
                              digits = digits),
                    sign. = round(pval,
                                  digits = digits))

      coef <- t(coef)
    }

    print.default(coef,
                  print.gap = 2)
  }
}

printstatarima(tmodel2)
## 
## Coefficients:
##                  s.e.        t   sign.
## ma1   -0.2097  0.1080  -1.9417  0.0548
## sma1  -0.1232  0.0974  -1.2649  0.2087
# =========================================================
# DIAGNOSTIC MODEL
# =========================================================
tsdisplay(residuals(tmodel2),
          lag.max = 45,
          main = 'ARIMA Seasonal Model Residuals',
          col = "blue")

Box.test(residuals(tmodel2),
         lag = 12,
         type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  residuals(tmodel2)
## X-squared = 9.6876, df = 12, p-value = 0.6433
Box.test(residuals(tmodel2),
         lag = 24,
         type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  residuals(tmodel2)
## X-squared = 37.012, df = 24, p-value = 0.04364
jarque.bera.test(residuals(tmodel2))
## 
##  Jarque Bera Test
## 
## data:  residuals(tmodel2)
## X-squared = 3.3041, df = 2, p-value = 0.1917
# =========================================================
# FORECASTING
# =========================================================
ramalan_arima2 <- forecast(tmodel2,
                           h = length(test.ts))

ramalan_arima2
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Jan 1958       346.0770 333.6921 358.4620 327.1359 365.0182
## Feb 1958       332.9001 317.1143 348.6860 308.7577 357.0425
## Mar 1958       385.9825 367.4083 404.5567 357.5758 414.3893
## Apr 1958       378.5491 357.5537 399.5445 346.4395 410.6588
## May 1958       385.2504 362.0855 408.4153 349.8227 420.6781
## Jun 1958       450.6965 425.5486 475.8445 412.2360 489.1570
## Jul 1958       493.3307 466.3451 520.3164 452.0597 534.6018
## Aug 1958       493.9807 465.2747 522.6867 450.0787 537.8827
## Sep 1958       432.8123 402.4834 463.1411 386.4283 479.1963
## Oct 1958       376.9813 345.1121 408.8505 328.2416 425.7210
## Nov 1958       335.8333 302.4949 369.1718 284.8466 386.8201
## Dec 1958       367.3882 332.6426 402.1338 314.2493 420.5271
## Jan 1959       377.2977 336.8805 417.7149 315.4850 439.1105
## Feb 1959       364.1208 319.7248 408.5168 296.2230 432.0186
## Mar 1959       417.2032 369.1568 465.2496 343.7225 490.6839
## Apr 1959       409.7698 358.3314 461.2083 331.1015 488.4382
## May 1959       416.4711 361.8509 471.0913 332.9367 500.0055
## Jun 1959       481.9172 424.2906 539.5438 393.7849 570.0495
## Jul 1959       524.5514 464.0677 585.0351 432.0496 617.0533
## Aug 1959       525.2014 461.9895 588.4132 428.5272 621.8755
## Sep 1959       464.0330 398.2060 529.8599 363.3593 564.7066
## Oct 1959       408.2020 339.8599 476.5441 303.6817 512.7222
## Nov 1959       367.0540 296.2861 437.8220 258.8238 475.2842
## Dec 1959       398.6089 325.4956 471.7222 286.7917 510.4260
## Jan 1960       408.5184 329.7792 487.2577 288.0971 528.9397
## Feb 1960       395.3415 312.1173 478.5657 268.0611 522.6219
## Mar 1960       448.4239 360.9444 535.9034 314.6355 582.2123
## Apr 1960       440.9905 349.4533 532.5278 300.9964 580.9847
## May 1960       447.6918 352.2692 543.1144 301.7555 593.6280
## Jun 1960       513.1379 413.9821 612.2937 361.4922 664.7836
## Jul 1960       555.7721 453.0187 658.5255 398.6243 712.9199
## Aug 1960       556.4220 450.1927 662.6514 393.9583 718.8858
## Sep 1960       495.2536 385.6586 604.8487 327.6425 662.8648
## Oct 1960       439.4227 326.5623 552.2831 266.8176 612.0277
## Nov 1960       398.2747 282.2408 514.3086 220.8162 575.7333
## Dec 1960       429.8296 310.7067 548.9525 247.6468 612.0123
prediksi <- forecast(tmodel2,
                     h = length(test.ts))

accuracy(prediksi,
         test.ts)
##                      ME      RMSE       MAE         MPE     MAPE      MASE
## Training set  0.3681975  8.967841  6.707961  0.09346515 2.878977 0.2194086
## Test set     -1.0358744 22.562641 17.988540 -1.00841076 4.178255 0.5883816
##                      ACF1 Theil's U
## Training set -0.006128396        NA
## Test set      0.622144156 0.4562945
plot(ramalan_arima2,
     col = "blue")

forecast_arima2 <- cbind(ramalan_arima2$mean,
                         ramalan_arima2$lower,
                         ramalan_arima2$upper)
forecast_arima2
##          ramalan_arima2$mean ramalan_arima2$lower.80% ramalan_arima2$lower.95%
## Jan 1958            346.0770                 333.6921                 327.1359
## Feb 1958            332.9001                 317.1143                 308.7577
## Mar 1958            385.9825                 367.4083                 357.5758
## Apr 1958            378.5491                 357.5537                 346.4395
## May 1958            385.2504                 362.0855                 349.8227
## Jun 1958            450.6965                 425.5486                 412.2360
## Jul 1958            493.3307                 466.3451                 452.0597
## Aug 1958            493.9807                 465.2747                 450.0787
## Sep 1958            432.8123                 402.4834                 386.4283
## Oct 1958            376.9813                 345.1121                 328.2416
## Nov 1958            335.8333                 302.4949                 284.8466
## Dec 1958            367.3882                 332.6426                 314.2493
## Jan 1959            377.2977                 336.8805                 315.4850
## Feb 1959            364.1208                 319.7248                 296.2230
## Mar 1959            417.2032                 369.1568                 343.7225
## Apr 1959            409.7698                 358.3314                 331.1015
## May 1959            416.4711                 361.8509                 332.9367
## Jun 1959            481.9172                 424.2906                 393.7849
## Jul 1959            524.5514                 464.0677                 432.0496
## Aug 1959            525.2014                 461.9895                 428.5272
## Sep 1959            464.0330                 398.2060                 363.3593
## Oct 1959            408.2020                 339.8599                 303.6817
## Nov 1959            367.0540                 296.2861                 258.8238
## Dec 1959            398.6089                 325.4956                 286.7917
## Jan 1960            408.5184                 329.7792                 288.0971
## Feb 1960            395.3415                 312.1173                 268.0611
## Mar 1960            448.4239                 360.9444                 314.6355
## Apr 1960            440.9905                 349.4533                 300.9964
## May 1960            447.6918                 352.2692                 301.7555
## Jun 1960            513.1379                 413.9821                 361.4922
## Jul 1960            555.7721                 453.0187                 398.6243
## Aug 1960            556.4220                 450.1927                 393.9583
## Sep 1960            495.2536                 385.6586                 327.6425
## Oct 1960            439.4227                 326.5623                 266.8176
## Nov 1960            398.2747                 282.2408                 220.8162
## Dec 1960            429.8296                 310.7067                 247.6468
##          ramalan_arima2$upper.80% ramalan_arima2$upper.95%
## Jan 1958                 358.4620                 365.0182
## Feb 1958                 348.6860                 357.0425
## Mar 1958                 404.5567                 414.3893
## Apr 1958                 399.5445                 410.6588
## May 1958                 408.4153                 420.6781
## Jun 1958                 475.8445                 489.1570
## Jul 1958                 520.3164                 534.6018
## Aug 1958                 522.6867                 537.8827
## Sep 1958                 463.1411                 479.1963
## Oct 1958                 408.8505                 425.7210
## Nov 1958                 369.1718                 386.8201
## Dec 1958                 402.1338                 420.5271
## Jan 1959                 417.7149                 439.1105
## Feb 1959                 408.5168                 432.0186
## Mar 1959                 465.2496                 490.6839
## Apr 1959                 461.2083                 488.4382
## May 1959                 471.0913                 500.0055
## Jun 1959                 539.5438                 570.0495
## Jul 1959                 585.0351                 617.0533
## Aug 1959                 588.4132                 621.8755
## Sep 1959                 529.8599                 564.7066
## Oct 1959                 476.5441                 512.7222
## Nov 1959                 437.8220                 475.2842
## Dec 1959                 471.7222                 510.4260
## Jan 1960                 487.2577                 528.9397
## Feb 1960                 478.5657                 522.6219
## Mar 1960                 535.9034                 582.2123
## Apr 1960                 532.5278                 580.9847
## May 1960                 543.1144                 593.6280
## Jun 1960                 612.2937                 664.7836
## Jul 1960                 658.5255                 712.9199
## Aug 1960                 662.6514                 718.8858
## Sep 1960                 604.8487                 662.8648
## Oct 1960                 552.2831                 612.0277
## Nov 1960                 514.3086                 575.7333
## Dec 1960                 548.9525                 612.0123