library(readxl)
library(forecast)
## Warning: package 'forecast' was built under R version 4.4.2
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.2
library(tseries)
library(caTools)
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.4.2
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(e1071)
## Warning: package 'e1071' was built under R version 4.4.2
library(caret)
## Warning: package 'caret' was built under R version 4.4.2
## Loading required package: lattice
library(kernlab)
##
## Attaching package: 'kernlab'
## The following object is masked from 'package:ggplot2':
##
## alpha
library(Metrics)
## Warning: package 'Metrics' was built under R version 4.4.2
##
## Attaching package: 'Metrics'
## The following objects are masked from 'package:caret':
##
## precision, recall
## The following object is masked from 'package:forecast':
##
## accuracy
library(MLmetrics)
## Warning: package 'MLmetrics' was built under R version 4.4.2
##
## Attaching package: 'MLmetrics'
## The following objects are masked from 'package:caret':
##
## MAE, RMSE
## The following object is masked from 'package:base':
##
## Recall
ecommerss1 <- read_xlsx("C:/Users/ASUS/Documents/ecommerss.xlsx")
## New names:
## • `` -> `...1`
print(ecommerss1)
## # A tibble: 34 × 3
## ...1 Total Ecommerce
## <chr> <dbl> <dbl>
## 1 2nd quarter 2024 1826928 291647
## 2 1st quarter 2024 1818701 287855
## 3 4th quarter 2023 1821361 283293
## 4 3rd quarter 2023 1811793 279739
## 5 2nd quarter 2023 1789656 273354
## 6 1st quarter 2023 1792376 266314
## 7 4th quarter 2022 1765897 257560
## 8 3rd quarter 2022 1768920 255583
## 9 2nd quarter 2022 1774783 251463
## 10 1st quarter 2022 1731846 247609
## # ℹ 24 more rows
data.ecommers2 <- ecommerss1[nrow(ecommerss1):1,]
print(data.ecommers2)
## # A tibble: 34 × 3
## ...1 Total Ecommerce
## <chr> <dbl> <dbl>
## 1 1st quarter 2016 1188574 91394
## 2 2nd quarter 2016 1201972 94320
## 3 3rd quarter 2016 1213119 97406
## 4 4th quarter 2016 1225283 100362
## 5 1st quarter 2017 1246092 104572
## 6 2nd quarter 2017 1247121 108452
## 7 3rd quarter 2017 1259520 112022
## 8 4th quarter 2017 1290945 117309
## 9 1st quarter 2018 1297427 121728
## 10 2nd quarter 2018 1312851 125311
## # ℹ 24 more rows
tahun <- as.numeric(substr(data.ecommers2$...1, nchar(data.ecommers2$...1) - 3, nchar(data.ecommers2$...1)))
kuartal <- ifelse(substr(data.ecommers2$...1, 1, 3) == "1st", 1,
ifelse(substr(data.ecommers2$...1, 1, 3) == "2nd", 2,
ifelse(substr(data.ecommers2$...1, 1, 3) == "3rd", 3, 4)))
data.ecommers2$X <- tahun * 100 + kuartal
print(data.ecommers2)
## # A tibble: 34 × 4
## ...1 Total Ecommerce X
## <chr> <dbl> <dbl> <dbl>
## 1 1st quarter 2016 1188574 91394 201601
## 2 2nd quarter 2016 1201972 94320 201602
## 3 3rd quarter 2016 1213119 97406 201603
## 4 4th quarter 2016 1225283 100362 201604
## 5 1st quarter 2017 1246092 104572 201701
## 6 2nd quarter 2017 1247121 108452 201702
## 7 3rd quarter 2017 1259520 112022 201703
## 8 4th quarter 2017 1290945 117309 201704
## 9 1st quarter 2018 1297427 121728 201801
## 10 2nd quarter 2018 1312851 125311 201802
## # ℹ 24 more rows
data_new <- data.ecommers2[, -1]
data.new <- data_new[, -1]
print(data.new)
## # A tibble: 34 × 2
## Ecommerce X
## <dbl> <dbl>
## 1 91394 201601
## 2 94320 201602
## 3 97406 201603
## 4 100362 201604
## 5 104572 201701
## 6 108452 201702
## 7 112022 201703
## 8 117309 201704
## 9 121728 201801
## 10 125311 201802
## # ℹ 24 more rows
Ecommerce <-c(91394, 94320, 97406, 100362, 104572, 108452, 112022, 117309, 121728, 125311, 128056, 131610, 132435, 137954, 146362, 152673, 159268, 211804, 215476, 220552, 231650, 239897, 235241, 243755, 247609, 251463, 255583, 257560, 266314, 273354, 279739, 283293, 287855, 291647)
X <-c(201601, 201602, 201603, 201604, 201701, 201702, 201703, 201704, 201801, 201802, 201803, 201804, 201901, 201902, 201903, 201904, 202001, 202002, 202003, 202004, 202101, 202102, 202103, 202104, 202201, 202202, 202203, 202204, 202301, 202302, 202303, 202304, 202401, 202402)
dframe <- data.frame(
Periode = X,
Ecommerce = Ecommerce
)
print(dframe)
## Periode Ecommerce
## 1 201601 91394
## 2 201602 94320
## 3 201603 97406
## 4 201604 100362
## 5 201701 104572
## 6 201702 108452
## 7 201703 112022
## 8 201704 117309
## 9 201801 121728
## 10 201802 125311
## 11 201803 128056
## 12 201804 131610
## 13 201901 132435
## 14 201902 137954
## 15 201903 146362
## 16 201904 152673
## 17 202001 159268
## 18 202002 211804
## 19 202003 215476
## 20 202004 220552
## 21 202101 231650
## 22 202102 239897
## 23 202103 235241
## 24 202104 243755
## 25 202201 247609
## 26 202202 251463
## 27 202203 255583
## 28 202204 257560
## 29 202301 266314
## 30 202302 273354
## 31 202303 279739
## 32 202304 283293
## 33 202401 287855
## 34 202402 291647
set.seed(123)
split <- sample.split(dframe$Ecommerce, SplitRatio = 0.75)
data_train <- subset(dframe, split == TRUE)
data_test <- subset(dframe, split == FALSE)
nrow(data_train)
## [1] 25
nrow(data_test)
## [1] 9
head(data_train)
## Periode Ecommerce
## 1 201601 91394
## 2 201602 94320
## 3 201603 97406
## 4 201604 100362
## 6 201702 108452
## 7 201703 112022
head(data_test)
## Periode Ecommerce
## 5 201701 104572
## 8 201704 117309
## 11 201803 128056
## 16 201904 152673
## 20 202004 220552
## 21 202101 231650
Data dibagi menjadi dua data, yaitu data training dan data testing. Data training yang digunakan untuk membuat model dengan proporsi data 0,75 artinya 75% dari data akan digunakan untuk melatih model dan 25% untuk menguji model
tss_data <- ts(data.ecommers2$`Ecommerce`, frequency=4, start=c(2016,1))
print(tss_data)
## Qtr1 Qtr2 Qtr3 Qtr4
## 2016 91394 94320 97406 100362
## 2017 104572 108452 112022 117309
## 2018 121728 125311 128056 131610
## 2019 132435 137954 146362 152673
## 2020 159268 211804 215476 220552
## 2021 231650 239897 235241 243755
## 2022 247609 251463 255583 257560
## 2023 266314 273354 279739 283293
## 2024 287855 291647
decomp <- decompose(tss_data)
plot(decomp$trend, main="Tren Jangka Panjang E-commerce", ylab="Nilai", xlab="Tahun")
Grafik tersebut menunjukkan tren naik, karena nilai e-commerce cenderung
meningkat dari tahun ke tahun. Terutama pada tahun 2020, tren naik cukup
signifikan.
seasonal.plot <- ggseasonplot(tss_data, main="Pola Musiman E-commerce",
year.labels=TRUE, year.labels.left=TRUE) +
ylab("Nilai") + xlab("Kuartal")
print(seasonal.plot)
adf_test <- adf.test(tss_data)
print("Hasil Uji Augmented Dickey-Fuller:")
## [1] "Hasil Uji Augmented Dickey-Fuller:"
print(adf_test)
##
## Augmented Dickey-Fuller Test
##
## data: tss_data
## Dickey-Fuller = -2.2762, Lag order = 3, p-value = 0.466
## alternative hypothesis: stationary
P-value digunakan untuk mengambil keputusan dari hipotesis. Hipotesis altenatif yang diuji adalah bahwa data tersebut stasioner. 0,466 > 0,05 maka gagal menolak hipotesis nol. Data ’tss_data” tidak dapat dikatakan stasioner
decomposisi <- decompose(tss_data)
plot(decomposisi)
Kemiringan tren meningkat lebih tajam setelah tahun 2020. Pola musiman
konsisten karena menunjukkan adanya siklus yang berulang secara
teratur.
acf(tss_data, main="ACF Plot", lag.max=20)
pacf(tss_data, main="PACF Plot", lag.max=20)
Sebagian besar batang pada grafik ACF berada di dalam batas kepercayaan,
yang menunjukkan tidak ada autokorelasi yang signifikan pada berbagai
lag. Pada grafik PACF autokorelasi parsial signifikan pada lag
tersebut
modell.sarima <- auto.arima(tss_data, seasonal=TRUE)
summary(modell.sarima)
## Series: tss_data
## ARIMA(0,1,0) with drift
##
## Coefficients:
## drift
## 6068.273
## s.e. 1506.888
##
## sigma^2 = 77275136: log likelihood = -346
## AIC=696.01 AICc=696.41 BIC=699
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 2.509579 8528.162 3720.467 -0.3277412 2.073663 0.1470439
## ACF1
## Training set -0.01814832
MAPE 2,07% menunjukkan eror prediksi rata-rata yang cukup baik. ACF1 -0,0181 (mendekati 0) menunjukkan residual tidak berkorelasi. Berdasarkan signifikan diatas, disimpulkan bahwa data tersebut memiliki tren pertumbuhan yang kuat. MPE negatif menunjukkan sedikit kecenderungan over-forecasting. Sehingga model ini cocok untuk forecasting karena eror metrics yang rendah.
forecast <- forecast(modell.sarima, h=8)
print(forecast)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2024 Q3 297715.3 286449.6 308980.9 280486.0 314944.6
## 2024 Q4 303783.5 287851.5 319715.6 279417.6 328149.5
## 2025 Q1 309851.8 290339.2 329364.5 280009.8 339693.9
## 2025 Q2 315920.1 293388.8 338451.4 281461.5 350378.7
## 2025 Q3 321988.4 296797.6 347179.1 283462.4 360514.3
## 2025 Q4 328056.6 300461.6 355651.7 285853.6 370259.7
## 2026 Q1 334124.9 304318.8 363931.0 288540.4 379709.4
## 2026 Q2 340193.2 308329.1 372057.2 291461.3 388925.0
plot(forecast, main="Forecast E-commerce")
Berdasarkan grafik, penjualan e-commerce diproyeksikan akan terus tumbuh
pesat dalam beberapa tahun kedepan. Pada tahun 2020 terjadi percepatan
pertumbuhan yang cukup tajam.
MPE.sarima <- mean(abs((forecast$mean - tss_data[length(tss_data)]) / tss_data[length(tss_data)]) * 100)
print(MPE.sarima)
## [1] 9.363109
model_svr <- svm(Ecommerce ~ .,data_train, type = 'eps-regression', kernel = 'linear')
summary(model_svr)
##
## Call:
## svm(formula = Ecommerce ~ ., data = data_train, type = "eps-regression",
## kernel = "linear")
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: linear
## cost: 1
## gamma: 1
## epsilon: 0.1
##
##
## Number of Support Vectors: 18
coef(model_svr)
## (Intercept) Periode
## 0.02703209 0.96366373
prediksi.test <- predict(model_svr, newdata = data_test)
tahun_awal <- 2024
jumlah_periode <- 10
periode <- seq(3, jumlah_periode, by = 1)
tahun <- tahun_awal + floor((periode - 1) / 4)
angka_periode <- ((periode - 1) %% 4) + 1
periode_baru <- tahun * 100 + angka_periode
prediksi.periode <- data.frame(Periode = periode_baru)
print(prediksi.periode)
## Periode
## 1 202403
## 2 202404
## 3 202501
## 4 202502
## 5 202503
## 6 202504
## 7 202601
## 8 202602
hasil.predict <- predict(model_svr, newdata = prediksi.periode)
print(hasil.predict)
## 1 2 3 4 5 6 7 8
## 299060.3 299324.9 324984.3 325248.9 325513.4 325777.9 351437.4 351701.9
hasil_prediksi <- rbind(dframe, data.frame(Periode = prediksi.periode$Periode, Ecommerce = hasil.predict))
hasil_prediksi$tipe <- ifelse(hasil_prediksi$Periode <= max(dframe$Periode), "Aktual", "Prediksi")
plot <- ggplot(hasil_prediksi, aes(x = Periode, y = Ecommerce, color = tipe)) +
geom_line() +
geom_point() +
theme_minimal() +
labs(title = "Forecast E-commerce menggunakan SVR",
x = "Periode",
y = "Nilai E-commerce",
color = "Keterangan") +
scale_color_manual(values = c("Data Aktual" = "blue", "Prediksi" = "red")) +
theme(plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
axis.title = element_text(size = 12),
legend.title = element_text(size = 10),
legend.text = element_text(size = 10))
show(plot)
actual_values <- dframe$Periode
predicted_values <- dframe$Ecommerce
MPE.svr <- mean((actual_values - predicted_values) / actual_values * 100)
cat("Mean Percentage Error (MPE):", MPE.svr, "%")
## Mean Percentage Error (MPE): 7.514559 %
MPE.sarima
## [1] 9.363109
MPE.svr
## [1] 7.514559
KESIMPULAN Setelah dilakukan forecast dengan menggunakan dua metode, akan diamati nilai MPE dari masing-masing metode. Metode dengan nilai RMSE paling kecil adalah metode terbaik