# ===========================================
# 1️⃣ Load Library
# ===========================================
library(readxl)
library(ggplot2)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(TTR)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tseries)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
# ===========================================
# 2️⃣ Baca Data
# ===========================================
data <- read_excel("C:/Users/User/Downloads/data arw ekspor.xlsx")
# Ubah nama kolom agar aman
colnames(data) <- c("Tahun", "Bulan", "Ekspor")
# Pastikan kolom Ekspor numeric
data$Ekspor <- gsub("\\.", "", data$Ekspor)
data$Ekspor <- gsub(",", ".", data$Ekspor)
data$Ekspor <- as.numeric(data$Ekspor)
# Buat kolom tanggal
data$Tanggal <- seq(as.Date("2020-01-01"), by = "month", length.out = nrow(data))
data$Tahun <- format(data$Tanggal, "%Y")
data$Bulan <- format(data$Tanggal, "%b")
data$Bulan <- factor(data$Bulan, levels = month.abb)
# ===========================================
# 3️⃣ Cek Struktur Data
# ===========================================
str(data)
## tibble [60 × 4] (S3: tbl_df/tbl/data.frame)
## $ Tahun : chr [1:60] "2020" "2020" "2020" "2020" ...
## $ Bulan : Factor w/ 12 levels "Jan","Feb","Mar",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Ekspor : num [1:60] 1096 1058 1177 991 848 ...
## $ Tanggal: Date[1:60], format: "2020-01-01" "2020-02-01" ...
summary(data)
## Tahun Bulan Ekspor Tanggal
## Length:60 Jan : 5 Min. : 793.4 Min. :2020-01-01
## Class :character Feb : 5 1st Qu.:1374.9 1st Qu.:2021-03-24
## Mode :character Mar : 5 Median :1908.2 Median :2022-06-16
## Apr : 5 Mean :1898.3 Mean :2022-06-16
## May : 5 3rd Qu.:2237.2 3rd Qu.:2023-09-08
## Jun : 5 Max. :3404.5 Max. :2024-12-01
## (Other):30
colSums(is.na(data))
## Tahun Bulan Ekspor Tanggal
## 0 0 0 0
#Pemisahan Data Training (2020–2023) & Testing (2024)
data_train <- data %>% filter(Tahun %in% c("2020", "2021", "2022", "2023"))
data_train
## # A tibble: 48 × 4
## Tahun Bulan Ekspor Tanggal
## <chr> <fct> <dbl> <date>
## 1 2020 Jan 1096. 2020-01-01
## 2 2020 Feb 1058 2020-02-01
## 3 2020 Mar 1177. 2020-03-01
## 4 2020 Apr 991. 2020-04-01
## 5 2020 May 848. 2020-05-01
## 6 2020 Jun 848. 2020-06-01
## 7 2020 Jul 876. 2020-07-01
## 8 2020 Aug 836. 2020-08-01
## 9 2020 Sep 793. 2020-09-01
## 10 2020 Oct 861. 2020-10-01
## # ℹ 38 more rows
# ===========================================
# 4️⃣ Pembagian Data Training & Testing
# ===========================================
# Buat objek time series (bulanan)
ts_ekspor <- ts(data$Ekspor, start = c(2020, 1), frequency = 12)
# Split data:
train <- window(ts_ekspor, end = c(2023, 12)) # 2020–2023
test <- window(ts_ekspor, start = c(2024, 1)) # 2024
# Visualisasi pembagian data
autoplot(ts_ekspor) +
autolayer(train, series = "Training", color = "blue") +
autolayer(test, series = "Testing", color = "red") +
ggtitle("Pembagian Data Training (2020–2023) dan Testing (2024)") +
xlab("Tahun") +
ylab("Nilai Ekspor (Juta USD)") +
theme_minimal()

# ===========================================
# 5️⃣ Statistika Deskriptif
# ===========================================
# Hitung ukuran pemusatan & penyebaran
stat_deks <- data %>%
summarise(
Mean = mean(Ekspor, na.rm = TRUE),
Median = median(Ekspor, na.rm = TRUE),
SD = sd(Ekspor, na.rm = TRUE),
Min = min(Ekspor, na.rm = TRUE),
Max = max(Ekspor, na.rm = TRUE)
)
# Tampilkan hasil
stat_deks
## # A tibble: 1 × 5
## Mean Median SD Min Max
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1898. 1908. 699. 793. 3404.
# ===========================================
# 6️⃣ Visualisasi Time Series dengan Garis Tren
# ===========================================
library(scales) # untuk fungsi date_breaks & date_labels
ggplot(data, aes(x = Tanggal, y = Ekspor)) +
geom_line(color = "black", linewidth = 0.5) + # garis utama
geom_point(color = "darkred", size = 1.5) + # titik data
geom_smooth(method = "loess", se = FALSE, # garis tren LOESS
color = "orange", linewidth = 1) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") + # tampilkan tahun di sumbu X
ggtitle("Nilai Ekspor Bulanan Indonesia (2020–2024)") +
xlab("Tahun") +
ylab("Nilai Ekspor (Juta USD)") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# ===========================================
# 7️⃣ Visualisasi Boxplot per Tahun & per Bulan
# ===========================================
# a. Boxplot per Tahun
ggplot(data, aes(x = Tahun, y = Ekspor, fill = Tahun)) +
geom_boxplot() +
ggtitle("Sebaran Nilai Ekspor per Tahun (2020–2024)") +
xlab("Tahun") +
ylab("Nilai Ekspor (Juta USD)") +
theme_minimal()

# b. Boxplot per Bulan
ggplot(data, aes(x = Bulan, y = Ekspor, fill = Bulan)) +
geom_boxplot() +
ggtitle("Sebaran Nilai Ekspor per Bulan (2020–2024)") +
xlab("Bulan") +
ylab("Nilai Ekspor (Juta USD)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# ===========================================
# 8️⃣ Deteksi Outlier per Tahun dengan Boxplot
# ===========================================
# a. Deteksi outlier per tahun dengan metode IQR
outlier_check <- data %>%
group_by(Tahun) %>%
mutate(
Q1 = quantile(Ekspor, 0.25, na.rm = TRUE),
Q3 = quantile(Ekspor, 0.75, na.rm = TRUE),
IQR = Q3 - Q1,
Lower = Q1 - 1.5 * IQR,
Upper = Q3 + 1.5 * IQR,
Outlier = ifelse(Ekspor < Lower | Ekspor > Upper, TRUE, FALSE)
)
# b. Simpan data yang termasuk outlier
outlier_values <- outlier_check %>% filter(Outlier == TRUE)
outlier_values
## # A tibble: 2 × 10
## # Groups: Tahun [1]
## Tahun Bulan Ekspor Tanggal Q1 Q3 IQR Lower Upper Outlier
## <chr> <fct> <dbl> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <lgl>
## 1 2022 Jan 1011. 2022-01-01 2651. 3141. 490. 1917. 3875. TRUE
## 2 2022 Feb 1838. 2022-02-01 2651. 3141. 490. 1917. 3875. TRUE
# c. Visualisasi boxplot per tahun untuk mendeteksi outlier
ggplot(data, aes(x = Tahun, y = Ekspor, fill = Tahun)) +
geom_boxplot(outlier.color = "red", outlier.shape = 19, outlier.size = 2.5) +
ggtitle("Deteksi Outlier Nilai Ekspor per Tahun (2020–2024)") +
xlab("Tahun") +
ylab("Nilai Ekspor (Juta USD)") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 0, vjust = 0.5),
legend.position = "none"
)

# d. (Opsional) Visualisasi data outlier dengan label
ggplot(outlier_values, aes(x = Tahun, y = Ekspor)) +
geom_point(color = "red", size = 3) +
geom_text(aes(label = round(Ekspor, 2)), vjust = -1, color = "black", size = 3) +
ggtitle("Nilai Outlier per Tahun pada Data Ekspor") +
ylab("Nilai Ekspor (Juta USD)") +
theme_minimal()

# ===========================================
#9️⃣ Uji Stasioneritas (ADF Test)
# ===========================================
# Uji stasioneritas awal menggunakan Augmented Dickey-Fuller Test
library(tseries)
adf_result <- adf.test(ts_ekspor)
print(adf_result)
##
## Augmented Dickey-Fuller Test
##
## data: ts_ekspor
## Dickey-Fuller = -1.3898, Lag order = 3, p-value = 0.8214
## alternative hypothesis: stationary
# Interpretasi awal:
# Jika p-value > 0.05 → data belum stasioner
# Jika p-value ≤ 0.05 → data sudah stasioner
# Jika p-value > 0.05, lakukan differencing ke-1
if (adf_result$p.value > 0.05) {
diff1 <- diff(ts_ekspor, differences = 1)
cat("\nData belum stasioner → dilakukan differencing ke-1\n")
adf_result_diff1 <- adf.test(diff1)
print(adf_result_diff1)
# Jika setelah differencing ke-1 masih belum stasioner → differencing ke-2
if (adf_result_diff1$p.value > 0.05) {
diff2 <- diff(ts_ekspor, differences = 2)
cat("\nMasih belum stasioner → dilakukan differencing ke-2\n")
adf_result_diff2 <- adf.test(diff2)
print(adf_result_diff2)
data_final <- diff2
cat("\nData akhir yang digunakan: differencing ke-2\n")
} else {
data_final <- diff1
cat("\nData akhir yang digunakan: differencing ke-1\n")
}
} else {
data_final <- ts_ekspor
cat("\nData sudah stasioner → tidak perlu differencing\n")
}
##
## Data belum stasioner → dilakukan differencing ke-1
##
## Augmented Dickey-Fuller Test
##
## data: diff1
## Dickey-Fuller = -3.4867, Lag order = 3, p-value = 0.05083
## alternative hypothesis: stationary
##
##
## Masih belum stasioner → dilakukan differencing ke-2
## Warning in adf.test(diff2): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff2
## Dickey-Fuller = -5.6787, Lag order = 3, p-value = 0.01
## alternative hypothesis: stationary
##
##
## Data akhir yang digunakan: differencing ke-2
# Plot ACF dan PACF dari data_final
par(mfrow = c(1, 2))
acf(data_final, main = "Plot ACF Data Akhir (Stasioner)")
pacf(data_final, main = "Plot PACF Data Akhir (Stasioner)")

par(mfrow = c(1, 1))