# ===========================================
# 1️⃣ Load Library
# ===========================================
library(readxl)
library(ggplot2)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(TTR)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tseries)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
# ===========================================
# 2️⃣ Baca Data
# ===========================================
data <- read_excel("C:/Users/User/Downloads/data arw ekspor.xlsx")
# Ubah nama kolom agar aman
colnames(data) <- c("Tahun", "Bulan", "Ekspor")
# Pastikan kolom Ekspor numeric
data$Ekspor <- gsub("\\.", "", data$Ekspor)
data$Ekspor <- gsub(",", ".", data$Ekspor)
data$Ekspor <- as.numeric(data$Ekspor)
# Buat kolom tanggal
data$Tanggal <- seq(as.Date("2020-01-01"), by = "month", length.out = nrow(data))
data$Tahun <- format(data$Tanggal, "%Y")
data$Bulan <- format(data$Tanggal, "%b")
data$Bulan <- factor(data$Bulan, levels = month.abb)
# ===========================================
# 3️⃣ Cek Struktur Data
# ===========================================
str(data)
## tibble [60 × 4] (S3: tbl_df/tbl/data.frame)
## $ Tahun : chr [1:60] "2020" "2020" "2020" "2020" ...
## $ Bulan : Factor w/ 12 levels "Jan","Feb","Mar",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Ekspor : num [1:60] 1096 1058 1177 991 848 ...
## $ Tanggal: Date[1:60], format: "2020-01-01" "2020-02-01" ...
summary(data)
## Tahun Bulan Ekspor Tanggal
## Length:60 Jan : 5 Min. : 793.4 Min. :2020-01-01
## Class :character Feb : 5 1st Qu.:1374.9 1st Qu.:2021-03-24
## Mode :character Mar : 5 Median :1908.2 Median :2022-06-16
## Apr : 5 Mean :1898.3 Mean :2022-06-16
## May : 5 3rd Qu.:2237.2 3rd Qu.:2023-09-08
## Jun : 5 Max. :3404.5 Max. :2024-12-01
## (Other):30
colSums(is.na(data))
## Tahun Bulan Ekspor Tanggal
## 0 0 0 0
#Pemisahan Data Training (2020–2023) & Testing (2024)
data_train <- data %>% filter(Tahun %in% c("2020", "2021", "2022", "2023"))
data_train
## # A tibble: 48 × 4
## Tahun Bulan Ekspor Tanggal
## <chr> <fct> <dbl> <date>
## 1 2020 Jan 1096. 2020-01-01
## 2 2020 Feb 1058 2020-02-01
## 3 2020 Mar 1177. 2020-03-01
## 4 2020 Apr 991. 2020-04-01
## 5 2020 May 848. 2020-05-01
## 6 2020 Jun 848. 2020-06-01
## 7 2020 Jul 876. 2020-07-01
## 8 2020 Aug 836. 2020-08-01
## 9 2020 Sep 793. 2020-09-01
## 10 2020 Oct 861. 2020-10-01
## # ℹ 38 more rows
# ===========================================
# 4️⃣ Pembagian Data Training & Testing
# ===========================================
# Buat objek time series (bulanan)
ts_ekspor <- ts(data$Ekspor, start = c(2020, 1), frequency = 12)
# Split data:
train <- window(ts_ekspor, end = c(2023, 12)) # 2020–2023
test <- window(ts_ekspor, start = c(2024, 1)) # 2024
# Visualisasi pembagian data
autoplot(ts_ekspor) +
autolayer(train, series = "Training", color = "blue") +
autolayer(test, series = "Testing", color = "red") +
ggtitle("Pembagian Data Training (2020–2023) dan Testing (2024)") +
xlab("Tahun") +
ylab("Nilai Ekspor (Juta USD)") +
theme_minimal()

# ===========================================
# 5️⃣ Statistika Deskriptif
# ===========================================
# Hitung ukuran pemusatan & penyebaran
stat_deks <- data %>%
summarise(
Mean = mean(Ekspor, na.rm = TRUE),
Median = median(Ekspor, na.rm = TRUE),
SD = sd(Ekspor, na.rm = TRUE),
Min = min(Ekspor, na.rm = TRUE),
Max = max(Ekspor, na.rm = TRUE)
)
# Tampilkan hasil
stat_deks
## # A tibble: 1 × 5
## Mean Median SD Min Max
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1898. 1908. 699. 793. 3404.
# ===========================================
# 6️⃣ Visualisasi Time Series dengan Garis Tren
# ===========================================
library(scales) # untuk fungsi date_breaks & date_labels
ggplot(data, aes(x = Tanggal, y = Ekspor)) +
geom_line(color = "black", linewidth = 0.5) + # garis utama
geom_point(color = "darkred", size = 1.5) + # titik data
geom_smooth(method = "loess", se = FALSE, # garis tren LOESS
color = "orange", linewidth = 1) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") + # tampilkan tahun di sumbu X
ggtitle("Nilai Ekspor Bulanan Indonesia (2020–2024)") +
xlab("Tahun") +
ylab("Nilai Ekspor (Juta USD)") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# ===========================================
# 7️⃣ Visualisasi Boxplot per Tahun & per Bulan
# ===========================================
# a. Boxplot per Tahun
ggplot(data, aes(x = Tahun, y = Ekspor, fill = Tahun)) +
geom_boxplot() +
ggtitle("Sebaran Nilai Ekspor per Tahun (2020–2024)") +
xlab("Tahun") +
ylab("Nilai Ekspor (Juta USD)") +
theme_minimal()

# b. Boxplot per Bulan
ggplot(data, aes(x = Bulan, y = Ekspor, fill = Bulan)) +
geom_boxplot() +
ggtitle("Sebaran Nilai Ekspor per Bulan (2020–2024)") +
xlab("Bulan") +
ylab("Nilai Ekspor (Juta USD)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# ===========================================
# 8️⃣ Deteksi Outlier per Tahun dengan Boxplot
# ===========================================
# a. Deteksi outlier per tahun dengan metode IQR
outlier_check <- data %>%
group_by(Tahun) %>%
mutate(
Q1 = quantile(Ekspor, 0.25, na.rm = TRUE),
Q3 = quantile(Ekspor, 0.75, na.rm = TRUE),
IQR = Q3 - Q1,
Lower = Q1 - 1.5 * IQR,
Upper = Q3 + 1.5 * IQR,
Outlier = ifelse(Ekspor < Lower | Ekspor > Upper, TRUE, FALSE)
)
# b. Simpan data yang termasuk outlier
outlier_values <- outlier_check %>% filter(Outlier == TRUE)
outlier_values
## # A tibble: 2 × 10
## # Groups: Tahun [1]
## Tahun Bulan Ekspor Tanggal Q1 Q3 IQR Lower Upper Outlier
## <chr> <fct> <dbl> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <lgl>
## 1 2022 Jan 1011. 2022-01-01 2651. 3141. 490. 1917. 3875. TRUE
## 2 2022 Feb 1838. 2022-02-01 2651. 3141. 490. 1917. 3875. TRUE
# c. Visualisasi boxplot per tahun untuk mendeteksi outlier
ggplot(data, aes(x = Tahun, y = Ekspor, fill = Tahun)) +
geom_boxplot(outlier.color = "red", outlier.shape = 19, outlier.size = 2.5) +
ggtitle("Deteksi Outlier Nilai Ekspor per Tahun (2020–2024)") +
xlab("Tahun") +
ylab("Nilai Ekspor (Juta USD)") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 0, vjust = 0.5),
legend.position = "none"
)

# d. (Opsional) Visualisasi data outlier dengan label
ggplot(outlier_values, aes(x = Tahun, y = Ekspor)) +
geom_point(color = "red", size = 3) +
geom_text(aes(label = round(Ekspor, 2)), vjust = -1, color = "black", size = 3) +
ggtitle("Nilai Outlier per Tahun pada Data Ekspor") +
ylab("Nilai Ekspor (Juta USD)") +
theme_minimal()

# ===========================================
# 9️⃣ Uji Stasioneritas (ADF Test) – Untuk ARIMA
# ===========================================
library(tseries)
adf_result <- adf.test(ts_ekspor)
print(adf_result)
##
## Augmented Dickey-Fuller Test
##
## data: ts_ekspor
## Dickey-Fuller = -1.3898, Lag order = 3, p-value = 0.8214
## alternative hypothesis: stationary
# Jika belum stasioner → differencing 1x
if(adf_result$p.value > 0.05){
data_diff <- diff(ts_ekspor, differences = 1)
cat("\nData belum stasioner → dilakukan differencing ke-1.\n")
print(adf.test(data_diff))
} else {
data_diff <- ts_ekspor
}
##
## Data belum stasioner → dilakukan differencing ke-1.
##
## Augmented Dickey-Fuller Test
##
## data: data_diff
## Dickey-Fuller = -3.4867, Lag order = 3, p-value = 0.05083
## alternative hypothesis: stationary
# Plot ACF & PACF
par(mfrow=c(1,2))
acf(data_diff, main="ACF Data Setelah Differencing")
pacf(data_diff, main="PACF Data Setelah Differencing")

par(mfrow=c(1,1))
# ===========================================
# 🔟 Identifikasi Pola Time Series
# ===========================================
dekomposisi <- decompose(ts_ekspor, type = "additive")
autoplot(dekomposisi) +
ggtitle("Dekomposisi Time Series Ekspor (Trend, Seasonal, Residual)") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the forecast package.
## Please report the issue at <https://github.com/robjhyndman/forecast/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# ===========================================
# 11 Pemodelan Double Exponential Smoothing (Holt’s Method)
# ===========================================
# Model Holt (ETS(A,A,N))
model_ets <- ets(train, model = "AAN")
# Ringkasan model
summary(model_ets)
## ETS(A,A,N)
##
## Call:
## ets(y = train, model = "AAN")
##
## Smoothing parameters:
## alpha = 0.9792
## beta = 1e-04
##
## Initial states:
## l = 1095.8582
## b = 25.6961
##
## sigma: 352.7499
##
## AIC AICc BIC
## 754.7540 756.1826 764.1100
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -7.719492 337.7324 230.7804 -2.314159 12.87525 0.2439897
## ACF1
## Training set 0.08142485
# Plot fitted vs data training
autoplot(train, series = "Data Training") +
autolayer(fitted(model_ets), series = "Fitted Holt", linewidth = 1, color = "blue") +
ggtitle("Model Holt’s Linear Trend (ETS AAN)") +
xlab("Tahun") +
ylab("Nilai Ekspor (Juta USD)") +
theme_minimal()

# Forecast 12 bulan
forecast_ets <- forecast(model_ets, h = 12)
# Plot forecast
autoplot(forecast_ets) +
autolayer(test, series = "Data Aktual (2024)", linewidth = 1, color = "red") +
ggtitle("Forecast Ekspor 2024 Menggunakan Holt’s Method") +
xlab("Tahun") +
ylab("Nilai Ekspor (Juta USD)") +
theme_minimal()

# Akurasi model
accuracy(forecast_ets$mean, test)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set -287.6418 338.0984 292.5585 -16.65445 16.89153 -0.08531764 1.1516
# ===========================================
# 12 Pemodelan Regresi Time Series (Trend Analysis)
# ===========================================
# 1. Buat variabel waktu
data$t <- 1:nrow(data) # indeks waktu
data$t2 <- data$t^2 # kuadratik
# Tampilkan tabel tanggal + t
head(data[, c("Tanggal", "Ekspor", "t", "t2")])
## # A tibble: 6 × 4
## Tanggal Ekspor t t2
## <date> <dbl> <int> <dbl>
## 1 2020-01-01 1096. 1 1
## 2 2020-02-01 1058 2 4
## 3 2020-03-01 1177. 3 9
## 4 2020-04-01 991. 4 16
## 5 2020-05-01 848. 5 25
## 6 2020-06-01 848. 6 36
View(data[, c("Tanggal", "Ekspor", "t", "t2")])
# -------------------------------------------
# MODEL REGRESI LINEAR
# -------------------------------------------
model_lin <- lm(Ekspor ~ t, data = data)
# Cetak output R console
print(summary(model_lin))
##
## Call:
## lm(formula = Ekspor ~ t, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -817.8 -444.4 -263.3 448.1 1497.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1394.28 168.00 8.299 1.93e-11 ***
## t 16.53 4.79 3.450 0.00105 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 642.6 on 58 degrees of freedom
## Multiple R-squared: 0.1703, Adjusted R-squared: 0.156
## F-statistic: 11.9 on 1 and 58 DF, p-value: 0.001051
# Prediksi Linear
data$Prediksi_Lin <- predict(model_lin)
# Plot model linear
ggplot(data, aes(x = Tanggal)) +
geom_line(aes(y = Ekspor), color = "black", linewidth = 0.6) +
geom_line(aes(y = Prediksi_Lin), color = "blue", linewidth = 1) +
ggtitle("Regresi Time Series: Tren Linear") +
xlab("Tahun") +
ylab("Nilai Ekspor (Juta USD)") +
theme_minimal()

# -------------------------------------------
# MODEL REGRESI KUADRATIK
# -------------------------------------------
model_quad <- lm(Ekspor ~ t + t2, data = data)
# Cetak output R console
print(summary(model_quad))
##
## Call:
## lm(formula = Ekspor ~ t + t2, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1235.7 -364.0 -77.5 323.0 1010.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 368.0623 187.6772 1.961 0.0548 .
## t 115.8373 14.1962 8.160 3.73e-11 ***
## t2 -1.6281 0.2256 -7.217 1.38e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 468.5 on 57 degrees of freedom
## Multiple R-squared: 0.5665, Adjusted R-squared: 0.5513
## F-statistic: 37.24 on 2 and 57 DF, p-value: 4.518e-11
# Prediksi Kuadratik
data$Prediksi_Quad <- predict(model_quad)
# Plot model kuadratik
ggplot(data, aes(x = Tanggal)) +
geom_line(aes(y = Ekspor), color = "black", linewidth = 0.6) +
geom_line(aes(y = Prediksi_Quad), color = "red", linewidth = 1) +
ggtitle("Regresi Time Series: Tren Kuadratik") +
xlab("Tahun") +
ylab("Nilai Ekspor (Juta USD)") +
theme_minimal()

# -------------------------------------------
# PERBANDINGAN LINEAR vs KUADRATIK
# -------------------------------------------
library(Metrics)
##
## Attaching package: 'Metrics'
## The following object is masked from 'package:forecast':
##
## accuracy
hasil_perbandingan <- data.frame(
Model = c("Linear", "Kuadratik"),
R_Squared = c(summary(model_lin)$r.squared,
summary(model_quad)$r.squared),
MAPE = c(mape(data$Ekspor, data$Prediksi_Lin),
mape(data$Ekspor, data$Prediksi_Quad))
)
print(hasil_perbandingan)
## Model R_Squared MAPE
## 1 Linear 0.1702809 0.3076634
## 2 Kuadratik 0.5664745 0.2282671
# ===========================================
# 13 Pemodelan ARIMA (Non-Seasonal)
# ===========================================
# Gunakan training data
train_arima <- train
# Identifikasi model terbaik
model_arima <- auto.arima(train_arima,
seasonal = FALSE) # Tidak memakai SARIMA
# Ringkasan model
summary(model_arima)
## Series: train_arima
## ARIMA(0,1,0)
##
## sigma^2 = 116411: log likelihood = -340.81
## AIC=683.63 AICc=683.72 BIC=685.48
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 18.17596 337.6178 226.6472 -0.6500162 12.50883 0.23962 0.06655587
# Diagnostik residual
checkresiduals(model_arima)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,0)
## Q* = 18.422, df = 10, p-value = 0.04826
##
## Model df: 0. Total lags used: 10
# Peramalan 12 bulan (2024)
forecast_arima <- forecast(model_arima, h = 12)
# Plot hasil peramalan
autoplot(forecast_arima) +
autolayer(test, series = "Data Aktual 2024", color = "red", linewidth = 1) +
ggtitle("Forecast Ekspor 2024 dengan ARIMA Non-Seasonal") +
xlab("Tahun") +
ylab("Nilai Ekspor (Juta USD)") +
theme_minimal()

# Hitung akurasi model
accuracy(forecast_arima$mean, test)
## [1] 0