knitr::opts_chunk$set(echo = TRUE)
# KODE R ANALISIS TIME SERIES SESUAI DATA EXCEL

# 1. LOAD PACKAGE
library(readxl)
library(forecast)
library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(ggplot2)
library(TTR)

# 2. IMPORT DATA EXCEL

# Ganti lokasi file sesuai folder kalian

data <- read_excel("~/data tds.xlsx")

# Melihat data awal
head(data)
## # A tibble: 6 × 5
##   Tanggal    `Rata-rata Suhu (°C)` `Kelembapan (%)` `Kecepatan Angin (km/h)`
##   <chr>                      <dbl>            <dbl>                    <dbl>
## 1 2013-01-01                 10                84.5                     0   
## 2 2013-01-02                  7.4              92                       2.98
## 3 2013-01-03                  7.17             87                       4.63
## 4 2013-01-04                  8.67             71.3                     1.23
## 5 2013-01-05                  6                86.8                     3.7 
## 6 2013-01-06                  7                82.8                     1.48
## # ℹ 1 more variable: `Tekanan Udara (hPa)` <dbl>
# 3. STRUKTUR DATA

str(data)
## tibble [1,462 × 5] (S3: tbl_df/tbl/data.frame)
##  $ Tanggal               : chr [1:1462] "2013-01-01" "2013-01-02" "2013-01-03" "2013-01-04" ...
##  $ Rata-rata Suhu (°C)   : num [1:1462] 10 7.4 7.17 8.67 6 ...
##  $ Kelembapan (%)        : num [1:1462] 84.5 92 87 71.3 86.8 ...
##  $ Kecepatan Angin (km/h): num [1:1462] 0 2.98 4.63 1.23 3.7 ...
##  $ Tekanan Udara (hPa)   : num [1:1462] 1016 1018 1019 1017 1016 ...
summary(data)
##       Tanggal     Rata-rata Suhu (°C) Kelembapan (%)   Kecepatan Angin (km/h)
##  Length   :1462   Min.   : 6.00       Min.   : 13.43   Min.   : 0.000        
##  N.unique :1462   1st Qu.:18.86       1st Qu.: 50.38   1st Qu.: 3.475        
##  N.blank  :   0   Median :27.71       Median : 62.62   Median : 6.222        
##  Min.nchar:  10   Mean   :25.50       Mean   : 60.77   Mean   : 6.802        
##  Max.nchar:  10   3rd Qu.:31.31       3rd Qu.: 72.22   3rd Qu.: 9.238        
##                   Max.   :38.71       Max.   :100.00   Max.   :42.220        
##  Tekanan Udara (hPa)
##  Min.   :  -3.042   
##  1st Qu.:1001.580   
##  Median :1008.563   
##  Mean   :1011.105   
##  3rd Qu.:1014.945   
##  Max.   :7679.333
# 4. DATA CLEANING

# Mengubah tanggal menjadi format Date

data$Tanggal <- as.Date(data$Tanggal)

# Mengecek missing value
colSums(is.na(data))
##                Tanggal    Rata-rata Suhu (°C)         Kelembapan (%) 
##                      0                      0                      0 
## Kecepatan Angin (km/h)    Tekanan Udara (hPa) 
##                      0                      0
# Mengurutkan berdasarkan tanggal

data <- data[order(data$Tanggal), ]

# 5. MEMILIH VARIABEL TIME SERIES
# Variabel yang akan dianalisis
# Contoh: suhu rata-rata

suhu <- ts(data$`Rata-rata Suhu (°C)`, frequency = 365)

# 6. VISUALISASI DATA
plot(
  suhu,
  main = "Time Series Rata-rata Suhu",
  xlab = "Waktu",
  ylab = "Suhu (°C)",
  col = "blue"
)

# 9. DIFFERENCING

diff_suhu <- diff(suhu)

plot(
  diff_suhu,
  main = "Differencing Orde 1",
  ylab = "Differencing",
  col = "red"
)

adf_diff <- adf.test(diff_suhu)
## Warning in adf.test(diff_suhu): p-value smaller than printed p-value
adf_diff
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff_suhu
## Dickey-Fuller = -14.011, Lag order = 11, p-value = 0.01
## alternative hypothesis: stationary
# 10. PLOT ACF DAN PACF

acf(
  diff_suhu,
  main = "Plot ACF"
)

pacf(
  diff_suhu,
  main = "Plot PACF"
)

# 11. MEMBAGI DATA TRAINING DAN TESTING
n <- length(suhu)

train_size <- floor(0.8 * n)

train <- suhu[1:train_size]

test <- suhu[(train_size + 1):n]

length(train)
## [1] 1169
length(test)
## [1] 293
# 12. PEMODELAN ARIMA
model_arima <- auto.arima(
  train,
  seasonal = FALSE
)

summary(model_arima)
## Series: train 
## ARIMA(1,0,1) with non-zero mean 
## 
## Coefficients:
##          ar1      ma1     mean
##       0.9853  -0.2080  23.9566
## s.e.  0.0051   0.0347   2.4384
## 
## sigma^2 = 2.67:  log likelihood = -2232.86
## AIC=4473.72   AICc=4473.75   BIC=4493.97
## 
## Training set error measures:
##                      ME     RMSE     MAE       MPE     MAPE     MASE       ACF1
## Training set 0.02237009 1.631968 1.23604 -0.469404 5.570551 1.001446 0.02359986
# 13. FORECASTING ARIMA

forecast_arima <- forecast(
  model_arima,
  h = length(test)
)

plot(
  forecast_arima,
  main = "Forecasting ARIMA Suhu"
)

# 14. EVALUASI MODEL ARIMA

akurasi_arima <- accuracy(
  forecast_arima,
  test
)

akurasi_arima
##                      ME     RMSE      MAE       MPE      MAPE     MASE
## Training set 0.02237009 1.631968 1.236040 -0.469404  5.570551 1.001446
## Test set     5.54239066 7.996391 7.191793 14.641808 24.451254 5.826829
##                    ACF1
## Training set 0.02359986
## Test set             NA
# 15. PEMODELAN SARIMA

model_sarima <- auto.arima(
  train,
  seasonal = TRUE
)

summary(model_sarima)
## Series: train 
## ARIMA(1,0,1) with non-zero mean 
## 
## Coefficients:
##          ar1      ma1     mean
##       0.9853  -0.2080  23.9566
## s.e.  0.0051   0.0347   2.4384
## 
## sigma^2 = 2.67:  log likelihood = -2232.86
## AIC=4473.72   AICc=4473.75   BIC=4493.97
## 
## Training set error measures:
##                      ME     RMSE     MAE       MPE     MAPE     MASE       ACF1
## Training set 0.02237009 1.631968 1.23604 -0.469404 5.570551 1.001446 0.02359986
# 16. FORECASTING SARIMA

forecast_sarima <- forecast(
  model_sarima,
  h = length(test)
)

plot(
  forecast_sarima,
  main = "Forecasting SARIMA Suhu"
)

# 17. EVALUASI MODEL SARIMA

akurasi_sarima <- accuracy(
  forecast_sarima,
  test
)

akurasi_sarima
##                      ME     RMSE      MAE       MPE      MAPE     MASE
## Training set 0.02237009 1.631968 1.236040 -0.469404  5.570551 1.001446
## Test set     5.54239066 7.996391 7.191793 14.641808 24.451254 5.826829
##                    ACF1
## Training set 0.02359986
## Test set             NA
# 18. PERBANDINGAN MODEL

perbandingan <- data.frame(
  Model = c("ARIMA", "SARIMA"),
  RMSE = c(akurasi_arima[2, "RMSE"], akurasi_sarima[2, "RMSE"]),
  MAE = c(akurasi_arima[2, "MAE"], akurasi_sarima[2, "MAE"]),
  MAPE = c(akurasi_arima[2, "MAPE"], akurasi_sarima[2, "MAPE"])
)

perbandingan
##    Model     RMSE      MAE     MAPE
## 1  ARIMA 7.996391 7.191793 24.45125
## 2 SARIMA 7.996391 7.191793 24.45125
# 19. MEMILIH MODEL TERBAIK

# Model terbaik adalah model dengan nilai
# RMSE, MAE, dan MAPE paling kecil.


# 20. FORECAST 30 HARI KE DEPAN

forecast_30hari <- forecast(
  model_sarima,
  h = 30
)

plot(
  forecast_30hari,
  main = "Forecast 30 Hari Kedepan"
)

forecast_30hari
##      Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 1170       22.80516 20.71102 24.89930 19.60245 26.00787
## 1171       22.82209 20.16975 25.47443 18.76569 26.87849
## 1172       22.83877 19.73925 25.93829 18.09846 27.57908
## 1173       22.85520 19.37611 26.33430 17.53439 28.17602
## 1174       22.87140 19.05980 26.68299 17.04206 28.70073
## 1175       22.88735 18.77862 26.99609 16.60358 29.17112
## 1176       22.90307 18.52512 27.28102 16.20758 29.59857
## 1177       22.91856 18.29422 27.54289 15.84625 29.99087
## 1178       22.93382 18.08225 27.78540 15.51398 30.35366
## 1179       22.94886 17.88642 28.01129 15.20653 30.69118
## 1180       22.96367 17.70462 28.22273 14.92064 31.00670
## 1181       22.97827 17.53512 28.42142 14.65369 31.30285
## 1182       22.99265 17.37656 28.60875 14.40357 31.58173
## 1183       23.00683 17.22778 28.78588 14.16853 31.84512
## 1184       23.02079 17.08782 28.95375 13.94710 32.09448
## 1185       23.03455 16.95588 29.11321 13.73804 32.33106
## 1186       23.04810 16.83126 29.26494 13.54027 32.55594
## 1187       23.06146 16.71335 29.40957 13.35287 32.77005
## 1188       23.07462 16.60162 29.54762 13.17502 32.97422
## 1189       23.08759 16.49560 29.67957 13.00602 33.16915
## 1190       23.10036 16.39489 29.80583 12.84523 33.35549
## 1191       23.11295 16.29911 29.92679 12.69208 33.53382
## 1192       23.12535 16.20793 30.04278 12.54607 33.70464
## 1193       23.13757 16.12106 30.15409 12.40674 33.86841
## 1194       23.14961 16.03821 30.26102 12.27367 34.02556
## 1195       23.16148 15.95916 30.36380 12.14648 34.17647
## 1196       23.17317 15.88367 30.46267 12.02484 34.32149
## 1197       23.18469 15.81154 30.55783 11.90843 34.46094
## 1198       23.19603 15.74258 30.64949 11.79696 34.59511
## 1199       23.20722 15.67662 30.73782 11.69016 34.72428