Model ARIMA - Analisis Suhu

Load Library

library(readxl)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(zoo)

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(forecast)
library(tseries)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

#membaca data

data_suhu <- read_excel("~/Suhu Rata rata.xlsx")

# Cek nama kolom
names(data_suhu)

## [1] "Tanggal"        "Suhu rata rata"

# Rename biar aman
colnames(data_suhu) <- c("Tanggal", "Suhu")

head(data_suhu)

## # A tibble: 6 × 2
##   Tanggal     Suhu
##   <chr>      <dbl>
## 1 2013-01-01 10   
## 2 2013-01-02  7.4 
## 3 2013-01-03  7.17
## 4 2013-01-04  8.67
## 5 2013-01-05  6   
## 6 2013-01-06  7

#cleaning data

# Ubah nilai tidak valid jadi NA
data_suhu$Suhu[data_suhu$Suhu %in% c(8888, 9999, "")] <- NA

# Pastikan numerik
data_suhu$Suhu <- as.numeric(data_suhu$Suhu)

# Cek NA
sum(is.na(data_suhu$Suhu))

## [1] 0

#interpolasi misisng value

data_suhu$Suhu <- na.approx(data_suhu$Suhu)
data_suhu$Tanggal <- as.Date(data_suhu$Tanggal)
data_suhu$Minggu <- floor_date(data_suhu$Tanggal, unit = "week", week_start = 1)

data_mingguan <- data_suhu %>%
  group_by(Minggu) %>%
  summarise(
    Suhu_Mingguan = round(mean(Suhu, na.rm = TRUE), 2)
  )

head(data_mingguan)

## # A tibble: 6 × 2
##   Minggu     Suhu_Mingguan
##   <date>             <dbl>
## 1 2012-12-31          7.71
## 2 2013-01-07         12.3 
## 3 2013-01-14         13.6 
## 4 2013-01-21         12.3 
## 5 2013-01-28         15.7 
## 6 2013-02-04         15.9

#time series dan split data

ts_suhu <- ts(data_mingguan$Suhu_Mingguan, frequency = 52)

n <- length(ts_suhu)
indeks_split <- floor(0.8 * n)

train.Suhu.ts <- ts_suhu[1:indeks_split]
test.Suhu.ts <- ts_suhu[(indeks_split+1):n]
ts_suhu <- ts(data_mingguan$Suhu_Mingguan, frequency = 52)

plot(ts_suhu, main="Time Series Suhu Mingguan", col="blue")

n <- length(ts_suhu)
indeks_split <- floor(0.8 * n)

train.Suhu.ts <- ts_suhu[1:indeks_split]
test.Suhu.ts <- ts_suhu[(indeks_split+1):n]

#Uji Stasioneritas

adf.test(train.Suhu.ts)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  train.Suhu.ts
## Dickey-Fuller = -3.6114, Lag order = 5, p-value = 0.03431
## alternative hypothesis: stationary

#Differencing

ts_diff <- diff(train.Suhu.ts)

plot(ts_diff, main="Differencing")

adf.test(ts_diff)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_diff
## Dickey-Fuller = -2.5831, Lag order = 5, p-value = 0.3329
## alternative hypothesis: stationary

#ACF & PACF

acf(ts_diff)

pacf(ts_diff)

# Load package

library(TSA)

## Registered S3 methods overwritten by 'TSA':
##   method       from    
##   fitted.Arima forecast
##   plot.Arima   forecast

## 
## Attaching package: 'TSA'

## The following objects are masked from 'package:stats':
## 
##     acf, arima

## The following object is masked from 'package:utils':
## 
##     tar

# Differencing orde 1
# EACF
eacf(ts_diff)

## AR/MA
##   0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 o o o o x o o o o o o  o  o  o 
## 1 x o o o x o o o o o o  o  o  o 
## 2 x o o o x o o o o o o  o  o  o 
## 3 x o o o x o o o o o o  o  o  o 
## 4 x o o x x o o o o o o  o  o  o 
## 5 x x o o o o o o o o o  o  o  o 
## 6 o x o o o o o o o o o  o  o  o 
## 7 o o o o o o o o o o o  o  o  o

# Cek struktur data
head(ts_diff)

## [1]  4.63  1.30 -1.31  3.41  0.12  0.15

# EACF untuk identifikasi model
eacf(ts_diff)

## AR/MA
##   0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 o o o o x o o o o o o  o  o  o 
## 1 x o o o x o o o o o o  o  o  o 
## 2 x o o o x o o o o o o  o  o  o 
## 3 x o o o x o o o o o o  o  o  o 
## 4 x o o x x o o o o o o  o  o  o 
## 5 x x o o o o o o o o o  o  o  o 
## 6 o x o o o o o o o o o  o  o  o 
## 7 o o o o o o o o o o o  o  o  o

#model Arima

data_ts <- ts_diff
model1 <- Arima(data_ts, order = c(5, 0, 2))
model2 <- Arima(data_ts, order = c(0, 0, 5))  
model3 <- Arima(data_ts, order = c(5, 0, 0))
model4 <- Arima(data_ts, order = c(5, 0, 1))

# Membuat tabel penampung secara langsung tanpa membuat fungsi terpisah
tabel_kandidat <- data.frame(
  Model = c("ARIMA(5,0,2)", "ARIMA(0,0,5)", "ARIMA(5,0,0)", "ARIMA(5,0,1)"),
  AIC   = round(c(model1$aic,  model2$aic,  model3$aic,  model4$aic), 2),
  AICc  = round(c(model1$aicc, model2$aicc, model3$aicc, model4$aicc), 2),
  BIC   = round(c(model1$bic,  model2$bic,  model3$bic,  model4$bic), 2)
)

# Memaksa R untuk langsung memunculkan tabel ke layar
tabel_kandidat

##          Model    AIC   AICc    BIC
## 1 ARIMA(5,0,2) 719.49 720.65 747.50
## 2 ARIMA(0,0,5) 719.81 720.52 741.59
## 3 ARIMA(5,0,0) 718.96 719.67 740.74
## 4 ARIMA(5,0,1) 718.79 719.71 743.69

# 1. LOAD LIBRARY YANG DIBUTUHKAN
# Jika belum menginstal lmtest, jalankan: install.packages("lmtest")
library(forecast)
library(lmtest)

# ESTIMASI PARAMETER MODEL ARIMA(5,0,1)
# Menggunakan objek data_ts (data stasioner non-musiman Anda)
# Orde yang dimasukkan adalah c(p=5, d=0, q=1)
model_arima_terbaik <- Arima(data_ts,
                       order = c(5,0,1))

summary(model_arima_terbaik)

## Series: data_ts 
## ARIMA(5,0,1) with non-zero mean 
## 
## Coefficients:
##          ar1     ar2     ar3     ar4     ar5      ma1    mean
##       0.1705  0.0896  0.0796  0.1230  0.2446  -0.3161  0.2252
## s.e.  0.1840  0.0778  0.0782  0.0807  0.0860   0.1812  0.3552
## 
## sigma^2 = 4.2:  log likelihood = -351.4
## AIC=718.79   AICc=719.71   BIC=743.69
## 
## Training set error measures:
##                       ME     RMSE      MAE      MPE     MAPE      MASE
## Training set -0.03423561 2.005629 1.630906 48.13774 160.9516 0.6716908
##                      ACF1
## Training set -0.006330614

coeftest(model_arima_terbaik)

## 
## z test of coefficients:
## 
##            Estimate Std. Error z value Pr(>|z|)   
## ar1        0.170527   0.183980  0.9269 0.353992   
## ar2        0.089557   0.077762  1.1517 0.249452   
## ar3        0.079564   0.078241  1.0169 0.309198   
## ar4        0.122980   0.080675  1.5244 0.127413   
## ar5        0.244616   0.086006  2.8442 0.004453 **
## ma1       -0.316059   0.181191 -1.7443 0.081100 . 
## intercept  0.225210   0.355154  0.6341 0.526002   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# 3. MENAMPILKAN TABEL HASIL ESTIMASI DAN UJI SIGNIFIKANSI (P-VALUE)
print(" HASIL ESTIMASI PARAMETER MODEL ARIMA(5,0,1) ")

## [1] " HASIL ESTIMASI PARAMETER MODEL ARIMA(5,0,1) "

# Fungsi ini memunculkan nilai p-value secara langsung untuk uji signifikansi
coeftest(model_arima_terbaik)

## 
## z test of coefficients:
## 
##            Estimate Std. Error z value Pr(>|z|)   
## ar1        0.170527   0.183980  0.9269 0.353992   
## ar2        0.089557   0.077762  1.1517 0.249452   
## ar3        0.079564   0.078241  1.0169 0.309198   
## ar4        0.122980   0.080675  1.5244 0.127413   
## ar5        0.244616   0.086006  2.8442 0.004453 **
## ma1       -0.316059   0.181191 -1.7443 0.081100 . 
## intercept  0.225210   0.355154  0.6341 0.526002   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#diasnostic checking

print(ls())

##  [1] "data_mingguan"       "data_suhu"           "data_ts"            
##  [4] "indeks_split"        "model_arima_terbaik" "model1"             
##  [7] "model2"              "model3"              "model4"             
## [10] "n"                   "tabel_kandidat"      "test.Suhu.ts"       
## [13] "train.Suhu.ts"       "ts_diff"             "ts_suhu"

checkresiduals(model_arima_terbaik)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(5,0,1) with non-zero mean
## Q* = 1.4028, df = 4, p-value = 0.8437
## 
## Model df: 6.   Total lags used: 10

sisaan <- residuals(model_arima_terbaik)

ks.test(sisaan, "pnorm", mean = mean(sisaan), sd = sd(sisaan))

## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  sisaan
## D = 0.045334, p-value = 0.8846
## alternative hypothesis: two-sided

t.test(sisaan, mu = 0)

## 
##  One Sample t-test
## 
## data:  sisaan
## t = -0.2193, df = 165, p-value = 0.8267
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.3424766  0.2740054
## sample estimates:
##   mean of x 
## -0.03423561

Box.test(sisaan, lag = 23, type = "Ljung")

## 
##  Box-Ljung test
## 
## data:  sisaan
## X-squared = 16.747, df = 23, p-value = 0.8215

#forecasting

h_step <- length(test.Suhu.ts)

ramalan_suhu <- forecast(model_arima_terbaik, h = h_step)

plot(ramalan_suhu, main = "Peramalan Suhu Mingguan")

lines(test.Suhu.ts, col = "red", type = "b", pch = 20)

legend("topleft",
       legend=c("Ramalan", "Data Aktual"),
       col=c("blue", "red"),
       lty=1)

#evaluasi akurasi

matriks_evaluasi <- accuracy(ramalan_suhu, test.Suhu.ts)

tabel_akurasi <- data.frame(
  Dataset = c("Training", "Testing"),
  RMSE = c(matriks_evaluasi[1, "RMSE"], matriks_evaluasi[2, "RMSE"]),
  MAPE = c(matriks_evaluasi[1, "MAPE"], matriks_evaluasi[2, "MAPE"])
)
tabel_akurasi$RMSE <- round(tabel_akurasi$RMSE, 2)
tabel_akurasi$MAPE <- round(tabel_akurasi$MAPE, 2)

print(tabel_akurasi)

##    Dataset  RMSE   MAPE
## 1 Training  2.01 160.95
## 2  Testing 29.22  98.34