# ===========================================
# 1️⃣ Load Library
# ===========================================
library(readxl)
library(ggplot2)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(TTR)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tseries)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
# ===========================================
# 2️⃣ Baca Data
# ===========================================

data <- read_excel("C:/Users/User/Downloads/data arw ekspor.xlsx")

# Ubah nama kolom agar aman
colnames(data) <- c("Tahun", "Bulan", "Ekspor")

# Pastikan kolom Ekspor numeric
data$Ekspor <- gsub("\\.", "", data$Ekspor)
data$Ekspor <- gsub(",", ".", data$Ekspor)
data$Ekspor <- as.numeric(data$Ekspor)

# Buat kolom tanggal
data$Tanggal <- seq(as.Date("2020-01-01"), by = "month", length.out = nrow(data))
data$Tahun <- format(data$Tanggal, "%Y")
data$Bulan <- format(data$Tanggal, "%b")
data$Bulan <- factor(data$Bulan, levels = month.abb)
# ===========================================
# 3️⃣ Cek Struktur Data
# ===========================================
str(data)
## tibble [60 × 4] (S3: tbl_df/tbl/data.frame)
##  $ Tahun  : chr [1:60] "2020" "2020" "2020" "2020" ...
##  $ Bulan  : Factor w/ 12 levels "Jan","Feb","Mar",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Ekspor : num [1:60] 1096 1058 1177 991 848 ...
##  $ Tanggal: Date[1:60], format: "2020-01-01" "2020-02-01" ...
summary(data)
##     Tahun               Bulan        Ekspor          Tanggal          
##  Length:60          Jan    : 5   Min.   : 793.4   Min.   :2020-01-01  
##  Class :character   Feb    : 5   1st Qu.:1374.9   1st Qu.:2021-03-24  
##  Mode  :character   Mar    : 5   Median :1908.2   Median :2022-06-16  
##                     Apr    : 5   Mean   :1898.3   Mean   :2022-06-16  
##                     May    : 5   3rd Qu.:2237.2   3rd Qu.:2023-09-08  
##                     Jun    : 5   Max.   :3404.5   Max.   :2024-12-01  
##                     (Other):30
colSums(is.na(data))
##   Tahun   Bulan  Ekspor Tanggal 
##       0       0       0       0
#Pemisahan Data Training (2020–2023) & Testing (2024)
data_train <- data %>% filter(Tahun %in% c("2020", "2021", "2022", "2023"))
data_train
## # A tibble: 48 × 4
##    Tahun Bulan Ekspor Tanggal   
##    <chr> <fct>  <dbl> <date>    
##  1 2020  Jan    1096. 2020-01-01
##  2 2020  Feb    1058  2020-02-01
##  3 2020  Mar    1177. 2020-03-01
##  4 2020  Apr     991. 2020-04-01
##  5 2020  May     848. 2020-05-01
##  6 2020  Jun     848. 2020-06-01
##  7 2020  Jul     876. 2020-07-01
##  8 2020  Aug     836. 2020-08-01
##  9 2020  Sep     793. 2020-09-01
## 10 2020  Oct     861. 2020-10-01
## # ℹ 38 more rows
# ===========================================
# 4️⃣ Pembagian Data Training & Testing
# ===========================================

# Buat objek time series (bulanan)
ts_ekspor <- ts(data$Ekspor, start = c(2020, 1), frequency = 12)

# Split data:
train <- window(ts_ekspor, end = c(2023, 12))  # 2020–2023
test  <- window(ts_ekspor, start = c(2024, 1)) # 2024

# Visualisasi pembagian data
autoplot(ts_ekspor) +
  autolayer(train, series = "Training", color = "blue") +
  autolayer(test, series = "Testing", color = "red") +
  ggtitle("Pembagian Data Training (2020–2023) dan Testing (2024)") +
  xlab("Tahun") +
  ylab("Nilai Ekspor (Juta USD)") +
  theme_minimal()

# ===========================================
# 5️⃣ Statistika Deskriptif
# ===========================================

# Hitung ukuran pemusatan & penyebaran
stat_deks <- data %>%
  summarise(
    Mean   = mean(Ekspor, na.rm = TRUE),
    Median = median(Ekspor, na.rm = TRUE),
    SD     = sd(Ekspor, na.rm = TRUE),
    Min    = min(Ekspor, na.rm = TRUE),
    Max    = max(Ekspor, na.rm = TRUE)
  )

# Tampilkan hasil
stat_deks
## # A tibble: 1 × 5
##    Mean Median    SD   Min   Max
##   <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 1898.  1908.  699.  793. 3404.
# ===========================================
# 6️⃣ Visualisasi Time Series dengan Garis Tren
# ===========================================

library(scales)  # untuk fungsi date_breaks & date_labels

ggplot(data, aes(x = Tanggal, y = Ekspor)) +
  geom_line(color = "black", linewidth = 0.5) +            # garis utama
  geom_point(color = "darkred", size = 1.5) +              # titik data
  geom_smooth(method = "loess", se = FALSE,                # garis tren LOESS
              color = "orange", linewidth = 1) +
  scale_x_date(date_breaks = "1 year", date_labels = "%Y") + # tampilkan tahun di sumbu X
  ggtitle("Nilai Ekspor Bulanan Indonesia (2020–2024)") +
  xlab("Tahun") +
  ylab("Nilai Ekspor (Juta USD)") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# ===========================================
# 7️⃣ Visualisasi Boxplot per Tahun & per Bulan
# ===========================================

# a. Boxplot per Tahun
ggplot(data, aes(x = Tahun, y = Ekspor, fill = Tahun)) +
  geom_boxplot() +
  ggtitle("Sebaran Nilai Ekspor per Tahun (2020–2024)") +
  xlab("Tahun") +
  ylab("Nilai Ekspor (Juta USD)") +
  theme_minimal()

# b. Boxplot per Bulan
ggplot(data, aes(x = Bulan, y = Ekspor, fill = Bulan)) +
  geom_boxplot() +
  ggtitle("Sebaran Nilai Ekspor per Bulan (2020–2024)") +
  xlab("Bulan") +
  ylab("Nilai Ekspor (Juta USD)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# ===========================================
# 8️⃣ Deteksi Outlier per Tahun dengan Boxplot
# ===========================================

# a. Deteksi outlier per tahun dengan metode IQR
outlier_check <- data %>%
  group_by(Tahun) %>%
  mutate(
    Q1 = quantile(Ekspor, 0.25, na.rm = TRUE),
    Q3 = quantile(Ekspor, 0.75, na.rm = TRUE),
    IQR = Q3 - Q1,
    Lower = Q1 - 1.5 * IQR,
    Upper = Q3 + 1.5 * IQR,
    Outlier = ifelse(Ekspor < Lower | Ekspor > Upper, TRUE, FALSE)
  )

# b. Simpan data yang termasuk outlier
outlier_values <- outlier_check %>% filter(Outlier == TRUE)
outlier_values
## # A tibble: 2 × 10
## # Groups:   Tahun [1]
##   Tahun Bulan Ekspor Tanggal       Q1    Q3   IQR Lower Upper Outlier
##   <chr> <fct>  <dbl> <date>     <dbl> <dbl> <dbl> <dbl> <dbl> <lgl>  
## 1 2022  Jan    1011. 2022-01-01 2651. 3141.  490. 1917. 3875. TRUE   
## 2 2022  Feb    1838. 2022-02-01 2651. 3141.  490. 1917. 3875. TRUE
# c. Visualisasi boxplot per tahun untuk mendeteksi outlier
ggplot(data, aes(x = Tahun, y = Ekspor, fill = Tahun)) +
  geom_boxplot(outlier.color = "red", outlier.shape = 19, outlier.size = 2.5) +
  ggtitle("Deteksi Outlier Nilai Ekspor per Tahun (2020–2024)") +
  xlab("Tahun") +
  ylab("Nilai Ekspor (Juta USD)") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5),
    axis.text.x = element_text(angle = 0, vjust = 0.5),
    legend.position = "none"
  )

# d. (Opsional) Visualisasi data outlier dengan label
ggplot(outlier_values, aes(x = Tahun, y = Ekspor)) +
  geom_point(color = "red", size = 3) +
  geom_text(aes(label = round(Ekspor, 2)), vjust = -1, color = "black", size = 3) +
  ggtitle("Nilai Outlier per Tahun pada Data Ekspor") +
  ylab("Nilai Ekspor (Juta USD)") +
  theme_minimal()

# ===========================================
# 9️⃣ Uji Stasioneritas (ADF Test) – Untuk ARIMA
# ===========================================

library(tseries)

adf_result <- adf.test(ts_ekspor)
print(adf_result)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_ekspor
## Dickey-Fuller = -1.3898, Lag order = 3, p-value = 0.8214
## alternative hypothesis: stationary
# Jika belum stasioner → differencing 1x 
if(adf_result$p.value > 0.05){
  data_diff <- diff(ts_ekspor, differences = 1)
  cat("\nData belum stasioner → dilakukan differencing ke-1.\n")
  print(adf.test(data_diff))
} else {
  data_diff <- ts_ekspor
  
}
## 
## Data belum stasioner → dilakukan differencing ke-1.
## 
##  Augmented Dickey-Fuller Test
## 
## data:  data_diff
## Dickey-Fuller = -3.4867, Lag order = 3, p-value = 0.05083
## alternative hypothesis: stationary
# Plot ACF & PACF 
par(mfrow=c(1,2))
acf(data_diff, main="ACF Data Setelah Differencing")
pacf(data_diff, main="PACF Data Setelah Differencing")

par(mfrow=c(1,1))
# ===========================================
# 🔟 Identifikasi Pola Time Series
# ===========================================

dekomposisi <- decompose(ts_ekspor, type = "additive")

autoplot(dekomposisi) +
  ggtitle("Dekomposisi Time Series Ekspor (Trend, Seasonal, Residual)") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the forecast package.
##   Please report the issue at <https://github.com/robjhyndman/forecast/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# ===========================================
# 11 Pemodelan Double Exponential Smoothing (Holt’s Method)
# ===========================================

# Model Holt (ETS(A,A,N))
model_ets <- ets(train, model = "AAN")

# Ringkasan model
summary(model_ets)
## ETS(A,A,N) 
## 
## Call:
## ets(y = train, model = "AAN")
## 
##   Smoothing parameters:
##     alpha = 0.9792 
##     beta  = 1e-04 
## 
##   Initial states:
##     l = 1095.8582 
##     b = 25.6961 
## 
##   sigma:  352.7499
## 
##      AIC     AICc      BIC 
## 754.7540 756.1826 764.1100 
## 
## Training set error measures:
##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -7.719492 337.7324 230.7804 -2.314159 12.87525 0.2439897
##                    ACF1
## Training set 0.08142485
# Plot fitted vs data training
autoplot(train, series = "Data Training") +
  autolayer(fitted(model_ets), series = "Fitted Holt", linewidth = 1, color = "blue") +
  ggtitle("Model Holt’s Linear Trend (ETS AAN)") +
  xlab("Tahun") +
  ylab("Nilai Ekspor (Juta USD)") +
  theme_minimal()

# Forecast 12 bulan
forecast_ets <- forecast(model_ets, h = 12)

# Plot forecast
autoplot(forecast_ets) +
  autolayer(test, series = "Data Aktual (2024)", linewidth = 1, color = "red") +
  ggtitle("Forecast Ekspor 2024 Menggunakan Holt’s Method") +
  xlab("Tahun") +
  ylab("Nilai Ekspor (Juta USD)") +
  theme_minimal()

# Akurasi model
accuracy(forecast_ets$mean, test)
##                 ME     RMSE      MAE       MPE     MAPE        ACF1 Theil's U
## Test set -287.6418 338.0984 292.5585 -16.65445 16.89153 -0.08531764    1.1516
# ===========================================
# 12 Pemodelan Regresi Time Series (Trend Analysis)
# ===========================================

# 1. Buat variabel waktu
data$t  <- 1:nrow(data)      # indeks waktu
data$t2 <- data$t^2          # kuadratik

# Tampilkan tabel tanggal + t
head(data[, c("Tanggal", "Ekspor", "t", "t2")])
## # A tibble: 6 × 4
##   Tanggal    Ekspor     t    t2
##   <date>      <dbl> <int> <dbl>
## 1 2020-01-01  1096.     1     1
## 2 2020-02-01  1058      2     4
## 3 2020-03-01  1177.     3     9
## 4 2020-04-01   991.     4    16
## 5 2020-05-01   848.     5    25
## 6 2020-06-01   848.     6    36
View(data[, c("Tanggal", "Ekspor", "t", "t2")])  

# -------------------------------------------
# MODEL REGRESI LINEAR
# -------------------------------------------
model_lin <- lm(Ekspor ~ t, data = data)

# Cetak output R console
print(summary(model_lin))
## 
## Call:
## lm(formula = Ekspor ~ t, data = data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -817.8 -444.4 -263.3  448.1 1497.9 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1394.28     168.00   8.299 1.93e-11 ***
## t              16.53       4.79   3.450  0.00105 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 642.6 on 58 degrees of freedom
## Multiple R-squared:  0.1703, Adjusted R-squared:  0.156 
## F-statistic:  11.9 on 1 and 58 DF,  p-value: 0.001051
# Prediksi Linear
data$Prediksi_Lin <- predict(model_lin)

# Plot model linear
ggplot(data, aes(x = Tanggal)) +
  geom_line(aes(y = Ekspor), color = "black", linewidth = 0.6) +
  geom_line(aes(y = Prediksi_Lin), color = "blue", linewidth = 1) +
  ggtitle("Regresi Time Series: Tren Linear") +
  xlab("Tahun") +
  ylab("Nilai Ekspor (Juta USD)") +
  theme_minimal()

# -------------------------------------------
# MODEL REGRESI KUADRATIK
# -------------------------------------------
model_quad <- lm(Ekspor ~ t + t2, data = data)

# Cetak output R console
print(summary(model_quad))
## 
## Call:
## lm(formula = Ekspor ~ t + t2, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1235.7  -364.0   -77.5   323.0  1010.0 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 368.0623   187.6772   1.961   0.0548 .  
## t           115.8373    14.1962   8.160 3.73e-11 ***
## t2           -1.6281     0.2256  -7.217 1.38e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 468.5 on 57 degrees of freedom
## Multiple R-squared:  0.5665, Adjusted R-squared:  0.5513 
## F-statistic: 37.24 on 2 and 57 DF,  p-value: 4.518e-11
# Prediksi Kuadratik
data$Prediksi_Quad <- predict(model_quad)

# Plot model kuadratik
ggplot(data, aes(x = Tanggal)) +
  geom_line(aes(y = Ekspor), color = "black", linewidth = 0.6) +
  geom_line(aes(y = Prediksi_Quad), color = "red", linewidth = 1) +
  ggtitle("Regresi Time Series: Tren Kuadratik") +
  xlab("Tahun") +
  ylab("Nilai Ekspor (Juta USD)") +
  theme_minimal()

# -------------------------------------------
# PERBANDINGAN LINEAR vs KUADRATIK
# -------------------------------------------
library(Metrics)
## 
## Attaching package: 'Metrics'
## The following object is masked from 'package:forecast':
## 
##     accuracy
hasil_perbandingan <- data.frame(
  Model = c("Linear", "Kuadratik"),
  R_Squared = c(summary(model_lin)$r.squared,
                summary(model_quad)$r.squared),
  MAPE = c(mape(data$Ekspor, data$Prediksi_Lin),
           mape(data$Ekspor, data$Prediksi_Quad))
)

print(hasil_perbandingan)
##       Model R_Squared      MAPE
## 1    Linear 0.1702809 0.3076634
## 2 Kuadratik 0.5664745 0.2282671
# ===========================================
# 13 Pemodelan ARIMA (Non-Seasonal)
# ===========================================

# Gunakan training data
train_arima <- train    

# Identifikasi model terbaik
model_arima <- auto.arima(train_arima,
                          seasonal = FALSE)  # Tidak memakai SARIMA

# Ringkasan model
summary(model_arima)
## Series: train_arima 
## ARIMA(0,1,0) 
## 
## sigma^2 = 116411:  log likelihood = -340.81
## AIC=683.63   AICc=683.72   BIC=685.48
## 
## Training set error measures:
##                    ME     RMSE      MAE        MPE     MAPE    MASE       ACF1
## Training set 18.17596 337.6178 226.6472 -0.6500162 12.50883 0.23962 0.06655587
# Diagnostik residual
checkresiduals(model_arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,0)
## Q* = 18.422, df = 10, p-value = 0.04826
## 
## Model df: 0.   Total lags used: 10
# Peramalan 12 bulan (2024)
forecast_arima <- forecast(model_arima, h = 12)

# Plot hasil peramalan
autoplot(forecast_arima) +
  autolayer(test, series = "Data Aktual 2024", color = "red", linewidth = 1) +
  ggtitle("Forecast Ekspor 2024 dengan ARIMA Non-Seasonal") +
  xlab("Tahun") +
  ylab("Nilai Ekspor (Juta USD)") +
  theme_minimal()

# Hitung akurasi model
accuracy(forecast_arima$mean, test)
## [1] 0