knitr::opts_chunk$set(echo = TRUE)
# KODE R ANALISIS TIME SERIES SESUAI DATA EXCEL
# 1. LOAD PACKAGE
library(readxl)
library(forecast)
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(ggplot2)
library(TTR)
# 2. IMPORT DATA EXCEL
# Ganti lokasi file sesuai folder kalian
data <- read_excel("~/data tds.xlsx")
# Melihat data awal
head(data)
## # A tibble: 6 × 5
## Tanggal `Rata-rata Suhu (°C)` `Kelembapan (%)` `Kecepatan Angin (km/h)`
## <chr> <dbl> <dbl> <dbl>
## 1 2013-01-01 10 84.5 0
## 2 2013-01-02 7.4 92 2.98
## 3 2013-01-03 7.17 87 4.63
## 4 2013-01-04 8.67 71.3 1.23
## 5 2013-01-05 6 86.8 3.7
## 6 2013-01-06 7 82.8 1.48
## # ℹ 1 more variable: `Tekanan Udara (hPa)` <dbl>
# 3. STRUKTUR DATA
str(data)
## tibble [1,462 × 5] (S3: tbl_df/tbl/data.frame)
## $ Tanggal : chr [1:1462] "2013-01-01" "2013-01-02" "2013-01-03" "2013-01-04" ...
## $ Rata-rata Suhu (°C) : num [1:1462] 10 7.4 7.17 8.67 6 ...
## $ Kelembapan (%) : num [1:1462] 84.5 92 87 71.3 86.8 ...
## $ Kecepatan Angin (km/h): num [1:1462] 0 2.98 4.63 1.23 3.7 ...
## $ Tekanan Udara (hPa) : num [1:1462] 1016 1018 1019 1017 1016 ...
summary(data)
## Tanggal Rata-rata Suhu (°C) Kelembapan (%) Kecepatan Angin (km/h)
## Length :1462 Min. : 6.00 Min. : 13.43 Min. : 0.000
## N.unique :1462 1st Qu.:18.86 1st Qu.: 50.38 1st Qu.: 3.475
## N.blank : 0 Median :27.71 Median : 62.62 Median : 6.222
## Min.nchar: 10 Mean :25.50 Mean : 60.77 Mean : 6.802
## Max.nchar: 10 3rd Qu.:31.31 3rd Qu.: 72.22 3rd Qu.: 9.238
## Max. :38.71 Max. :100.00 Max. :42.220
## Tekanan Udara (hPa)
## Min. : -3.042
## 1st Qu.:1001.580
## Median :1008.563
## Mean :1011.105
## 3rd Qu.:1014.945
## Max. :7679.333
# 4. DATA CLEANING
# Mengubah tanggal menjadi format Date
data$Tanggal <- as.Date(data$Tanggal)
# Mengecek missing value
colSums(is.na(data))
## Tanggal Rata-rata Suhu (°C) Kelembapan (%)
## 0 0 0
## Kecepatan Angin (km/h) Tekanan Udara (hPa)
## 0 0
# Mengurutkan berdasarkan tanggal
data <- data[order(data$Tanggal), ]
# 5. MEMILIH VARIABEL TIME SERIES
# Variabel yang akan dianalisis
# Contoh: suhu rata-rata
suhu <- ts(data$`Rata-rata Suhu (°C)`, frequency = 365)
# 6. VISUALISASI DATA
plot(
suhu,
main = "Time Series Rata-rata Suhu",
xlab = "Waktu",
ylab = "Suhu (°C)",
col = "blue"
)

# 9. DIFFERENCING
diff_suhu <- diff(suhu)
plot(
diff_suhu,
main = "Differencing Orde 1",
ylab = "Differencing",
col = "red"
)

adf_diff <- adf.test(diff_suhu)
## Warning in adf.test(diff_suhu): p-value smaller than printed p-value
adf_diff
##
## Augmented Dickey-Fuller Test
##
## data: diff_suhu
## Dickey-Fuller = -14.011, Lag order = 11, p-value = 0.01
## alternative hypothesis: stationary
# 10. PLOT ACF DAN PACF
acf(
diff_suhu,
main = "Plot ACF"
)

pacf(
diff_suhu,
main = "Plot PACF"
)

# 11. MEMBAGI DATA TRAINING DAN TESTING
n <- length(suhu)
train_size <- floor(0.8 * n)
train <- suhu[1:train_size]
test <- suhu[(train_size + 1):n]
length(train)
## [1] 1169
length(test)
## [1] 293
# 12. PEMODELAN ARIMA
model_arima <- auto.arima(
train,
seasonal = FALSE
)
summary(model_arima)
## Series: train
## ARIMA(1,0,1) with non-zero mean
##
## Coefficients:
## ar1 ma1 mean
## 0.9853 -0.2080 23.9566
## s.e. 0.0051 0.0347 2.4384
##
## sigma^2 = 2.67: log likelihood = -2232.86
## AIC=4473.72 AICc=4473.75 BIC=4493.97
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.02237009 1.631968 1.23604 -0.469404 5.570551 1.001446 0.02359986
# 13. FORECASTING ARIMA
forecast_arima <- forecast(
model_arima,
h = length(test)
)
plot(
forecast_arima,
main = "Forecasting ARIMA Suhu"
)

# 14. EVALUASI MODEL ARIMA
akurasi_arima <- accuracy(
forecast_arima,
test
)
akurasi_arima
## ME RMSE MAE MPE MAPE MASE
## Training set 0.02237009 1.631968 1.236040 -0.469404 5.570551 1.001446
## Test set 5.54239066 7.996391 7.191793 14.641808 24.451254 5.826829
## ACF1
## Training set 0.02359986
## Test set NA
# 15. PEMODELAN SARIMA
model_sarima <- auto.arima(
train,
seasonal = TRUE
)
summary(model_sarima)
## Series: train
## ARIMA(1,0,1) with non-zero mean
##
## Coefficients:
## ar1 ma1 mean
## 0.9853 -0.2080 23.9566
## s.e. 0.0051 0.0347 2.4384
##
## sigma^2 = 2.67: log likelihood = -2232.86
## AIC=4473.72 AICc=4473.75 BIC=4493.97
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.02237009 1.631968 1.23604 -0.469404 5.570551 1.001446 0.02359986
# 16. FORECASTING SARIMA
forecast_sarima <- forecast(
model_sarima,
h = length(test)
)
plot(
forecast_sarima,
main = "Forecasting SARIMA Suhu"
)

# 17. EVALUASI MODEL SARIMA
akurasi_sarima <- accuracy(
forecast_sarima,
test
)
akurasi_sarima
## ME RMSE MAE MPE MAPE MASE
## Training set 0.02237009 1.631968 1.236040 -0.469404 5.570551 1.001446
## Test set 5.54239066 7.996391 7.191793 14.641808 24.451254 5.826829
## ACF1
## Training set 0.02359986
## Test set NA
# 18. PERBANDINGAN MODEL
perbandingan <- data.frame(
Model = c("ARIMA", "SARIMA"),
RMSE = c(akurasi_arima[2, "RMSE"], akurasi_sarima[2, "RMSE"]),
MAE = c(akurasi_arima[2, "MAE"], akurasi_sarima[2, "MAE"]),
MAPE = c(akurasi_arima[2, "MAPE"], akurasi_sarima[2, "MAPE"])
)
perbandingan
## Model RMSE MAE MAPE
## 1 ARIMA 7.996391 7.191793 24.45125
## 2 SARIMA 7.996391 7.191793 24.45125
# 19. MEMILIH MODEL TERBAIK
# Model terbaik adalah model dengan nilai
# RMSE, MAE, dan MAPE paling kecil.
# 20. FORECAST 30 HARI KE DEPAN
forecast_30hari <- forecast(
model_sarima,
h = 30
)
plot(
forecast_30hari,
main = "Forecast 30 Hari Kedepan"
)

forecast_30hari
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 1170 22.80516 20.71102 24.89930 19.60245 26.00787
## 1171 22.82209 20.16975 25.47443 18.76569 26.87849
## 1172 22.83877 19.73925 25.93829 18.09846 27.57908
## 1173 22.85520 19.37611 26.33430 17.53439 28.17602
## 1174 22.87140 19.05980 26.68299 17.04206 28.70073
## 1175 22.88735 18.77862 26.99609 16.60358 29.17112
## 1176 22.90307 18.52512 27.28102 16.20758 29.59857
## 1177 22.91856 18.29422 27.54289 15.84625 29.99087
## 1178 22.93382 18.08225 27.78540 15.51398 30.35366
## 1179 22.94886 17.88642 28.01129 15.20653 30.69118
## 1180 22.96367 17.70462 28.22273 14.92064 31.00670
## 1181 22.97827 17.53512 28.42142 14.65369 31.30285
## 1182 22.99265 17.37656 28.60875 14.40357 31.58173
## 1183 23.00683 17.22778 28.78588 14.16853 31.84512
## 1184 23.02079 17.08782 28.95375 13.94710 32.09448
## 1185 23.03455 16.95588 29.11321 13.73804 32.33106
## 1186 23.04810 16.83126 29.26494 13.54027 32.55594
## 1187 23.06146 16.71335 29.40957 13.35287 32.77005
## 1188 23.07462 16.60162 29.54762 13.17502 32.97422
## 1189 23.08759 16.49560 29.67957 13.00602 33.16915
## 1190 23.10036 16.39489 29.80583 12.84523 33.35549
## 1191 23.11295 16.29911 29.92679 12.69208 33.53382
## 1192 23.12535 16.20793 30.04278 12.54607 33.70464
## 1193 23.13757 16.12106 30.15409 12.40674 33.86841
## 1194 23.14961 16.03821 30.26102 12.27367 34.02556
## 1195 23.16148 15.95916 30.36380 12.14648 34.17647
## 1196 23.17317 15.88367 30.46267 12.02484 34.32149
## 1197 23.18469 15.81154 30.55783 11.90843 34.46094
## 1198 23.19603 15.74258 30.64949 11.79696 34.59511
## 1199 23.20722 15.67662 30.73782 11.69016 34.72428