Packages

library(readxl)
## Warning: package 'readxl' was built under R version 4.1.2
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## Warning: package 'tibble' was built under R version 4.1.2
## Warning: package 'tidyr' was built under R version 4.1.2
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'purrr' was built under R version 4.1.2
## Warning: package 'dplyr' was built under R version 4.1.2
## Warning: package 'forcats' was built under R version 4.1.2
## Warning: package 'lubridate' was built under R version 4.1.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(e1071)  
## Warning: package 'e1071' was built under R version 4.1.2
library(dplyr) 
library(skimr)
## Warning: package 'skimr' was built under R version 4.1.2
library(DataExplorer)
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 4.1.2
## Loading required package: rpart
## Warning: package 'rpart' was built under R version 4.1.2
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
library(ROSE)
## Loaded ROSE 0.0-4
library(caTools)
library(recipes)
## 
## Attaching package: 'recipes'
## 
## The following object is masked from 'package:stringr':
## 
##     fixed
## 
## The following object is masked from 'package:stats':
## 
##     step
library(caret)
## Warning: package 'caret' was built under R version 4.1.2
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 4.1.2
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift

Memanggil Data

setwd("~/Desktop")
Data_Lama <- read.csv("tugas-sainsdata-23.csv", header = TRUE)
Data<- Data_Lama[,-1]
head(Data)
##   cabang jenis.kelamin usia pendidikan frekuensi.fashion nilai.fashion
## 1     11             2   38          2                 5        1.1588
## 2      2             2   33          2                 4        0.4964
## 3      7             1   41          3                 5        0.6008
## 4      2             1   43          3                 6        0.3612
## 5      3             2   37          4                 2        0.6572
## 6      4             2   39          3                 3        0.3894
##   frekuensi.footwear nilai.footwear frekuensi.lainnya nilai.lainnya
## 1                  1         0.4560                 4        0.9990
## 2                  1         0.9162                 4        0.2822
## 3                  1         0.0384                 4        0.6252
## 4                  3         0.5626                 6        0.4118
## 5                  3         0.1014                 1        0.6478
## 6                  2         0.2674                 3        0.5866
##   total.nilai.tunai lama.member promo
## 1              0.00          18     0
## 2              0.59          35     1
## 3              3.05          39     0
## 4              0.00           9     1
## 5              1.06          51     0
## 6              0.26          19     0

Data Eksploration

Ringkasan Statistik Deskriptif
summary(Data)
##      cabang       jenis.kelamin        usia         pendidikan   
##  Min.   : 1.000   Min.   :1.000   Min.   :26.00   Min.   :1.000  
##  1st Qu.: 4.000   1st Qu.:1.000   1st Qu.:37.00   1st Qu.:3.000  
##  Median : 8.000   Median :2.000   Median :40.00   Median :3.000  
##  Mean   : 7.609   Mean   :1.575   Mean   :40.13   Mean   :3.067  
##  3rd Qu.:11.000   3rd Qu.:2.000   3rd Qu.:44.00   3rd Qu.:4.000  
##  Max.   :14.000   Max.   :2.000   Max.   :56.00   Max.   :4.000  
##  frekuensi.fashion nilai.fashion    frekuensi.footwear nilai.footwear  
##  Min.   :0.000     Min.   :0.0296   Min.   :0.000      Min.   :0.0250  
##  1st Qu.:2.000     1st Qu.:0.3952   1st Qu.:2.000      1st Qu.:0.4107  
##  Median :3.000     Median :0.6768   Median :3.000      Median :0.6904  
##  Mean   :3.061     Mean   :0.7852   Mean   :3.073      Mean   :0.8451  
##  3rd Qu.:4.000     3rd Qu.:1.0470   3rd Qu.:4.000      3rd Qu.:1.1055  
##  Max.   :8.000     Max.   :2.6276   Max.   :8.000      Max.   :3.5494  
##  frekuensi.lainnya nilai.lainnya    total.nilai.tunai  lama.member   
##  Min.   :0.000     Min.   :0.0214   Min.   : 0.000    Min.   : 1.00  
##  1st Qu.:2.000     1st Qu.:0.4323   1st Qu.: 0.000    1st Qu.:14.00  
##  Median :3.000     Median :0.7311   Median : 0.675    Median :25.00  
##  Mean   :2.782     Mean   :0.8506   Mean   : 2.240    Mean   :25.85  
##  3rd Qu.:4.000     3rd Qu.:1.1118   3rd Qu.: 3.045    3rd Qu.:38.00  
##  Max.   :7.000     Max.   :3.0334   Max.   :23.020    Max.   :51.00  
##      promo       
##  Min.   :0.0000  
##  1st Qu.:0.0000  
##  Median :0.0000  
##  Mean   :0.3324  
##  3rd Qu.:1.0000  
##  Max.   :1.0000
Struktur Data
str(Data)
## 'data.frame':    358 obs. of  13 variables:
##  $ cabang            : int  11 2 7 2 3 4 11 9 12 13 ...
##  $ jenis.kelamin     : int  2 2 1 1 2 2 2 1 1 1 ...
##  $ usia              : int  38 33 41 43 37 39 41 42 45 52 ...
##  $ pendidikan        : int  2 2 3 3 4 3 3 3 3 3 ...
##  $ frekuensi.fashion : int  5 4 5 6 2 3 4 5 4 2 ...
##  $ nilai.fashion     : num  1.159 0.496 0.601 0.361 0.657 ...
##  $ frekuensi.footwear: int  1 1 1 3 3 2 6 3 3 1 ...
##  $ nilai.footwear    : num  0.456 0.9162 0.0384 0.5626 0.1014 ...
##  $ frekuensi.lainnya : int  4 4 4 6 1 3 4 4 2 2 ...
##  $ nilai.lainnya     : num  0.999 0.282 0.625 0.412 0.648 ...
##  $ total.nilai.tunai : num  0 0.59 3.05 0 1.06 0.26 2.85 0 0.3 6.31 ...
##  $ lama.member       : int  18 35 39 9 51 19 21 27 5 37 ...
##  $ promo             : int  0 1 0 1 0 0 1 0 0 0 ...
Eksploarasi Data
plot_intro(Data,ggtheme = theme_classic())

Eksploarasi Variabel Respon
Data %>%
  count(promo) %>%
  mutate(percent = n * 100 / sum(n), label = str_c(round(percent, 2), "%")) %>%
  ggplot(aes(x = "", y = n, fill = as.factor(promo))) +
  geom_col() +
  geom_text(aes(label = label), position = position_stack(vjust = 0.5)) +
  coord_polar(theta = "y") +
  theme_void()

Eksplorasi Secara Numerik
skim_without_charts(Data)
Data summary
Name Data
Number of rows 358
Number of columns 13
_______________________
Column type frequency:
numeric 13
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100
cabang 0 1 7.61 3.96 1.00 4.00 8.00 11.00 14.00
jenis.kelamin 0 1 1.58 0.49 1.00 1.00 2.00 2.00 2.00
usia 0 1 40.13 5.07 26.00 37.00 40.00 44.00 56.00
pendidikan 0 1 3.07 0.81 1.00 3.00 3.00 4.00 4.00
frekuensi.fashion 0 1 3.06 1.52 0.00 2.00 3.00 4.00 8.00
nilai.fashion 0 1 0.79 0.52 0.03 0.40 0.68 1.05 2.63
frekuensi.footwear 0 1 3.07 1.63 0.00 2.00 3.00 4.00 8.00
nilai.footwear 0 1 0.85 0.61 0.03 0.41 0.69 1.11 3.55
frekuensi.lainnya 0 1 2.78 1.49 0.00 2.00 3.00 4.00 7.00
nilai.lainnya 0 1 0.85 0.58 0.02 0.43 0.73 1.11 3.03
total.nilai.tunai 0 1 2.24 3.43 0.00 0.00 0.68 3.04 23.02
lama.member 0 1 25.85 14.41 1.00 14.00 25.00 38.00 51.00
promo 0 1 0.33 0.47 0.00 0.00 0.00 1.00 1.00
Eksplorasi Hubungan Prediktor Kategorik dengan Respon
# Plot hubungan antara Jenis Kelamin dan Promo Response
plot1 <- ggplot(Data, aes(x = factor(jenis.kelamin), fill = factor(promo))) +
  geom_bar(position = "dodge", stat = "count") +
  labs(x = "Jenis Kelamin", fill = "Promo Response") +
  theme_minimal() +
  theme(legend.position="bottom")

# Plot hubungan antara Pendidikan dan Promo Response
plot2 <- ggplot(Data, aes(x = factor(pendidikan), fill = factor(promo))) +
  geom_bar(position = "dodge", stat = "count") +
  labs(x = "Pendidikan", fill = "Promo Response") +
  theme_minimal() +
  theme(legend.position="bottom")

# Plot hubungan antara Cabang dan Promo Response
plot3 <- ggplot(Data, aes(x = factor(frekuensi.fashion), fill = factor(promo))) +
  geom_bar(position = "dodge", stat = "count") +
  labs(x = "Cabang", fill = "Promo Response") +
  theme_minimal() +
  theme(legend.position="bottom")

grid.arrange(plot1, plot2, plot3, ncol = 2)

Eksplorasi Hubungan Prediktor Kontinu dengan Respon
plot_boxplot(data = Data, by = "promo",
             ggtheme = theme_classic(),
             geom_boxplot_args = list(fill = "Blue"))

Data Pre-processing

Tahap praproses data terdiri dari:

Data cleaning (pembersihan data): adalah proses identifikasi, koreksi, dan penghapusan masalah atau ketidaksempurnaan dalam data agar dapat diolah dan dianalisis dengan akurat dengan menangani Missing Value, outlier, duplikasi data dan kesalahan input data.

Data Cleaning

Handling Outliers
# Mendeteksi outlier untuk setiap variabel numerik

# Menghitung IQR untuk setiap variabel numerik
iqr_values <- apply(Data[, c("usia", "frekuensi.fashion", "nilai.fashion", 
                             "frekuensi.footwear", "nilai.footwear", "frekuensi.lainnya", 
                             "nilai.lainnya", "total.nilai.tunai", "lama.member")], 2, IQR)

# Menghitung batas atas dan batas bawah untuk mendeteksi outlier
lower_bound <- apply(Data[, c("usia", "frekuensi.fashion", "nilai.fashion", 
                              "frekuensi.footwear", "nilai.footwear", "frekuensi.lainnya", 
                              "nilai.lainnya", "total.nilai.tunai", "lama.member")], 2, function(x) quantile(x, 0.25) - 1.5 * IQR(x))
upper_bound <- apply(Data[, c("usia", "frekuensi.fashion", "nilai.fashion", 
                              "frekuensi.footwear", "nilai.footwear", "frekuensi.lainnya", 
                              "nilai.lainnya", "total.nilai.tunai", "lama.member")], 2, function(x) quantile(x, 0.75) + 1.5 * IQR(x))

# Mendeteksi outlier
outliers <- sapply(1:ncol(Data[, c("usia", "frekuensi.fashion", "nilai.fashion", 
                                    "frekuensi.footwear", "nilai.footwear", "frekuensi.lainnya", 
                                    "nilai.lainnya", "total.nilai.tunai", "lama.member")]),
                   function(i) Data[, c("usia", "frekuensi.fashion", "nilai.fashion", 
                                         "frekuensi.footwear", "nilai.footwear", "frekuensi.lainnya", 
                                         "nilai.lainnya", "total.nilai.tunai", "lama.member")][which(Data[, c("usia", "frekuensi.fashion", "nilai.fashion", 
                                                                                                                  "frekuensi.footwear", "nilai.footwear", "frekuensi.lainnya", 
                                                                                                                  "nilai.lainnya", "total.nilai.tunai", "lama.member")][, i] < lower_bound[i] | Data[, c("usia", "frekuensi.fashion", "nilai.fashion", 
                                                                                                                                                                              "frekuensi.footwear", "nilai.footwear", "frekuensi.lainnya", 
                                                                                                                                                                              "nilai.lainnya", "total.nilai.tunai", "lama.member")][, i] > upper_bound[i]), i])

# Menampilkan outlier
print(outliers)
## [[1]]
## [1] 56 26
## 
## [[2]]
## [1] 8
## 
## [[3]]
##  [1] 2.2226 2.6276 2.0776 2.2716 2.2006 2.3006 2.1268 2.4374 2.3964 2.5078
## [11] 2.1186 2.3752 2.2716
## 
## [[4]]
## [1] 8 8
## 
## [[5]]
##  [1] 2.4224 2.8932 3.5494 2.4298 2.2678 2.2986 2.5292 2.2276 2.4346 3.0260
## [11] 2.7520 2.1770 2.5416 2.6252 2.7402 2.8930 2.1974
## 
## [[6]]
## integer(0)
## 
## [[7]]
##  [1] 2.8132 2.2904 2.5626 2.4766 2.5246 2.5776 3.0334 2.2050 2.5248 2.6500
## [11] 2.7596 2.1968 2.2588 2.9106 3.0118 2.5678
## 
## [[8]]
##  [1]  9.88  9.48 23.02  8.80  8.44  8.80 13.32  8.48  9.73  8.43 21.26  8.32
## [13] 15.53 22.95  9.92  9.80  9.42  9.53  8.69 10.32  9.78  8.92 11.40  8.61
## [25] 12.26  8.12  7.87
## 
## [[9]]
## integer(0)
# Menggantikan outlier dengan nilai batas atas atau batas bawah

# Gantilah dengan batas atas atau batas bawah sesuai kebutuhan
replace_outliers <- function(x, lower_bound, upper_bound) {
  x[x < lower_bound] <- lower_bound
  x[x > upper_bound] <- upper_bound
  return(x)
}

# Gantilah outlier untuk setiap variabel numerik
Data_clean <- Data

Data_clean$usia <- replace_outliers(Data_clean$usia, lower_bound[1], upper_bound[1])
Data_clean$frekuensi.fashion <- replace_outliers(Data_clean$frekuensi.fashion, lower_bound[2], upper_bound[2])
Data_clean$nilai.fashion <- replace_outliers(Data_clean$nilai.fashion, lower_bound[3], upper_bound[3])
Data_clean$frekuensi.footwear <- replace_outliers(Data_clean$frekuensi.footwear, lower_bound[4], upper_bound[4])
Data_clean$nilai.footwear <- replace_outliers(Data_clean$nilai.footwear, lower_bound[5], upper_bound[5])
Data_clean$frekuensi.lainnya <- replace_outliers(Data_clean$frekuensi.lainnya, lower_bound[6], upper_bound[6])
Data_clean$nilai.lainnya <- replace_outliers(Data_clean$nilai.lainnya, lower_bound[7], upper_bound[7])
Data_clean$total.nilai.tunai <- replace_outliers(Data_clean$total.nilai.tunai, lower_bound[8], upper_bound[8])
Data_clean$lama.member <- replace_outliers(Data_clean$lama.member, lower_bound[9], upper_bound[9])

# Tampilkan struktur data setelah mengatasi outlier
str(Data_clean)
## 'data.frame':    358 obs. of  13 variables:
##  $ cabang            : int  11 2 7 2 3 4 11 9 12 13 ...
##  $ jenis.kelamin     : int  2 2 1 1 2 2 2 1 1 1 ...
##  $ usia              : num  38 33 41 43 37 39 41 42 45 52 ...
##  $ pendidikan        : int  2 2 3 3 4 3 3 3 3 3 ...
##  $ frekuensi.fashion : num  5 4 5 6 2 3 4 5 4 2 ...
##  $ nilai.fashion     : num  1.159 0.496 0.601 0.361 0.657 ...
##  $ frekuensi.footwear: num  1 1 1 3 3 2 6 3 3 1 ...
##  $ nilai.footwear    : num  0.456 0.9162 0.0384 0.5626 0.1014 ...
##  $ frekuensi.lainnya : num  4 4 4 6 1 3 4 4 2 2 ...
##  $ nilai.lainnya     : num  0.999 0.282 0.625 0.412 0.648 ...
##  $ total.nilai.tunai : num  0 0.59 3.05 0 1.06 0.26 2.85 0 0.3 6.31 ...
##  $ lama.member       : num  18 35 39 9 51 19 21 27 5 37 ...
##  $ promo             : int  0 1 0 1 0 0 1 0 0 0 ...
Feature Scaling (Standarisasi)
# Pilih kolom-kolom numerik yang akan distandarisasi
numeric_columns <- sapply(Data_clean, is.numeric)
selected_data <- Data_clean[, numeric_columns]

# Standarisasi data
scaled_data <- scale(selected_data)

# Gantikan kolom numerik pada data awal dengan data yang sudah distandarisasi
Data_clean[, numeric_columns] <- scaled_data

# Tampilkan beberapa baris data setelah standarisasi
head(Data_clean)
##       cabang jenis.kelamin       usia  pendidikan frekuensi.fashion
## 1  0.8556472     0.8577896 -0.4209199 -1.31129051        1.28268728
## 2 -1.4152715     0.8577896 -1.4109944 -1.31129051        0.62196491
## 3 -0.1536500    -1.1625306  0.1731248 -0.08238474        1.28268728
## 4 -1.4152715    -1.1625306  0.5691545 -0.08238474        1.94340965
## 5 -1.1629472     0.8577896 -0.6189348  1.14652102       -0.69947983
## 6 -0.9106229     0.8577896 -0.2229050 -0.08238474       -0.03875746
##   nilai.fashion frekuensi.footwear nilai.footwear frekuensi.lainnya
## 1     0.7810533        -1.28179132     -0.6638244         0.8150925
## 2    -0.5673239        -1.28179132      0.1658464         0.8150925
## 3    -0.3548080        -1.28179132     -1.4166939         0.8150925
## 4    -0.8425362        -0.04157161     -0.4716408         2.1536389
## 5    -0.2400005        -0.04157161     -1.3031145        -1.1927271
## 6    -0.7851325        -0.66168146     -1.0038416         0.1458193
##   nilai.lainnya total.nilai.tunai lama.member      promo
## 1     0.3206062        -0.7744219  -0.5445594 -0.7046397
## 2    -1.0417954        -0.5428881   0.6352869  1.4152008
## 3    -0.3898650         0.4224899   0.9128978 -0.7046397
## 4    -0.7954683        -0.7744219  -1.1691839  1.4152008
## 5    -0.3469098        -0.3584460   1.7457305 -0.7046397
## 6    -0.4632309        -0.6723901  -0.4751566 -0.7046397
Handling Class Imbalance
# Memeriksa distribusi kelas
table(Data_clean$promo)
## 
## -0.704639727422298   1.41520079709184 
##                239                119
# Memeriksa persentase kelas
prop.table(table(Data_clean$promo)) * 100
## 
## -0.704639727422298   1.41520079709184 
##           66.75978           33.24022
# Visualisasi distribusi kelas
barplot(table(Data_clean$promo), col = c("blue", "red"), main = "Distribusi Kelas")

# Menangani Imbalanced Data dengan Oversampling
oversampled_data <- ROSE(promo ~ ., data = Data_clean, seed = 123, N = nrow(Data) / 2, p = 0.5)$data

# Setelah oversampling, memeriksa kembali distribusi kelas
table(oversampled_data$promo)
## 
## -0.704639727422298   1.41520079709184 
##                 94                 85
# Visualisasi distribusi kelas setelah penangan
barplot(table(oversampled_data$promo), col = c("blue", "red"), main = "Distribusi Kelas")

Model Training and Evaluation Menggunakan Support Vector Machine (SVM)

Tanpa Pre-processing Data

Memisahkan Data untuk Training dan Testing
set.seed(123)
Split_tp <- sample.split(Data, SplitRatio = 0.7)
Train_tp <- subset(Data, Split_tp==TRUE)
Test_tp <- subset(Data, Split_tp==FALSE)
Modeling Train Data (Default Parameter)
Train_tp$promo <- as.factor(Train_tp$promo)
Test_tp$promo <- as.factor(Test_tp$promo)
svm_model_tp <- svm(promo ~ ., data = Train_tp)
summary(svm_model_tp)
## 
## Call:
## svm(formula = promo ~ ., data = Train_tp)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
## 
## Number of Support Vectors:  209
## 
##  ( 121 88 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  0 1
Predict (Default Parameter)
p_train_tp <- predict(svm_model_tp)
p_train_tp <-confusionMatrix(p_train_tp, Train_tp$promo)
p_train_tp
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 157  44
##          1   3  44
##                                           
##                Accuracy : 0.8105          
##                  95% CI : (0.7561, 0.8573)
##     No Information Rate : 0.6452          
##     P-Value [Acc > NIR] : 8.525e-09       
##                                           
##                   Kappa : 0.5376          
##                                           
##  Mcnemar's Test P-Value : 5.392e-09       
##                                           
##             Sensitivity : 0.9812          
##             Specificity : 0.5000          
##          Pos Pred Value : 0.7811          
##          Neg Pred Value : 0.9362          
##              Prevalence : 0.6452          
##          Detection Rate : 0.6331          
##    Detection Prevalence : 0.8105          
##       Balanced Accuracy : 0.7406          
##                                           
##        'Positive' Class : 0               
## 

Dengan Pre-processing Data

Memisahkan Data untuk Training dan Testing (Split Data)
set.seed(123)
Split <- sample.split(oversampled_data, SplitRatio = 0.7)
Train <- subset(oversampled_data, Split==TRUE)
Test <- subset(oversampled_data, Split==FALSE)
Modeling Train Data (Default Parameter)
Train$promo <- as.factor(Train$promo)
Test$promo <- as.factor(Test$promo)
svm_model <- svm(promo ~ ., data = Train)
summary(svm_model)
## 
## Call:
## svm(formula = promo ~ ., data = Train)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
## 
## Number of Support Vectors:  115
## 
##  ( 59 56 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  -0.704639727422298 1.41520079709184
Predict (Default Parameter)
p_train <- predict(svm_model)
p_train_cm<-confusionMatrix(p_train, Train$promo)
p_train_cm
## Confusion Matrix and Statistics
## 
##                     Reference
## Prediction           -0.704639727422298 1.41520079709184
##   -0.704639727422298                 64               11
##   1.41520079709184                    2               47
##                                             
##                Accuracy : 0.8952            
##                  95% CI : (0.8274, 0.943)   
##     No Information Rate : 0.5323            
##     P-Value [Acc > NIR] : <2e-16            
##                                             
##                   Kappa : 0.7874            
##                                             
##  Mcnemar's Test P-Value : 0.0265            
##                                             
##             Sensitivity : 0.9697            
##             Specificity : 0.8103            
##          Pos Pred Value : 0.8533            
##          Neg Pred Value : 0.9592            
##              Prevalence : 0.5323            
##          Detection Rate : 0.5161            
##    Detection Prevalence : 0.6048            
##       Balanced Accuracy : 0.8900            
##                                             
##        'Positive' Class : -0.704639727422298
## 
Kernel Linear
Train$promo <- as.factor(Train$promo)
Test$promo <- as.factor(Test$promo)
svm_model_kl <- svm(promo ~ ., data = Train, kernel = "linear")
summary(svm_model_kl)
## 
## Call:
## svm(formula = promo ~ ., data = Train, kernel = "linear")
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  1 
## 
## Number of Support Vectors:  88
## 
##  ( 44 44 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  -0.704639727422298 1.41520079709184
p_train_kl <- predict(svm_model_kl)
p_train_kl <-confusionMatrix(p_train_kl, Train$promo)
p_train_kl
## Confusion Matrix and Statistics
## 
##                     Reference
## Prediction           -0.704639727422298 1.41520079709184
##   -0.704639727422298                 55               21
##   1.41520079709184                   11               37
##                                             
##                Accuracy : 0.7419            
##                  95% CI : (0.6557, 0.8163)  
##     No Information Rate : 0.5323            
##     P-Value [Acc > NIR] : 1.304e-06         
##                                             
##                   Kappa : 0.4762            
##                                             
##  Mcnemar's Test P-Value : 0.1116            
##                                             
##             Sensitivity : 0.8333            
##             Specificity : 0.6379            
##          Pos Pred Value : 0.7237            
##          Neg Pred Value : 0.7708            
##              Prevalence : 0.5323            
##          Detection Rate : 0.4435            
##    Detection Prevalence : 0.6129            
##       Balanced Accuracy : 0.7356            
##                                             
##        'Positive' Class : -0.704639727422298
## 
Kernel Polynomial
Train$promo <- as.factor(Train$promo)
Test$promo <- as.factor(Test$promo)
svm_model_p <- svm(promo ~ ., data = Train, kernel = "polynomial")
summary(svm_model_p)
## 
## Call:
## svm(formula = promo ~ ., data = Train, kernel = "polynomial")
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  polynomial 
##        cost:  1 
##      degree:  3 
##      coef.0:  0 
## 
## Number of Support Vectors:  109
## 
##  ( 57 52 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  -0.704639727422298 1.41520079709184
p_train_p <- predict(svm_model_p)
p_train_p <-confusionMatrix(p_train_p, Train$promo)
p_train_p
## Confusion Matrix and Statistics
## 
##                     Reference
## Prediction           -0.704639727422298 1.41520079709184
##   -0.704639727422298                 66               20
##   1.41520079709184                    0               38
##                                             
##                Accuracy : 0.8387            
##                  95% CI : (0.7619, 0.8986)  
##     No Information Rate : 0.5323            
##     P-Value [Acc > NIR] : 6.310e-13         
##                                             
##                   Kappa : 0.6692            
##                                             
##  Mcnemar's Test P-Value : 2.152e-05         
##                                             
##             Sensitivity : 1.0000            
##             Specificity : 0.6552            
##          Pos Pred Value : 0.7674            
##          Neg Pred Value : 1.0000            
##              Prevalence : 0.5323            
##          Detection Rate : 0.5323            
##    Detection Prevalence : 0.6935            
##       Balanced Accuracy : 0.8276            
##                                             
##        'Positive' Class : -0.704639727422298
## 
Kernel Sigmoid
Train$promo <- as.factor(Train$promo)
Test$promo <- as.factor(Test$promo)
svm_model_s <- svm(promo ~ ., data = Train, kernel = "sigmoid")
summary(svm_model_s)
## 
## Call:
## svm(formula = promo ~ ., data = Train, kernel = "sigmoid")
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  sigmoid 
##        cost:  1 
##      coef.0:  0 
## 
## Number of Support Vectors:  96
## 
##  ( 48 48 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  -0.704639727422298 1.41520079709184
p_train_s <- predict(svm_model_s)
p_train_s <-confusionMatrix(p_train_s, Train$promo)
p_train_s
## Confusion Matrix and Statistics
## 
##                     Reference
## Prediction           -0.704639727422298 1.41520079709184
##   -0.704639727422298                 56               28
##   1.41520079709184                   10               30
##                                             
##                Accuracy : 0.6935            
##                  95% CI : (0.6044, 0.7732)  
##     No Information Rate : 0.5323            
##     P-Value [Acc > NIR] : 0.0001848         
##                                             
##                   Kappa : 0.3727            
##                                             
##  Mcnemar's Test P-Value : 0.0058198         
##                                             
##             Sensitivity : 0.8485            
##             Specificity : 0.5172            
##          Pos Pred Value : 0.6667            
##          Neg Pred Value : 0.7500            
##              Prevalence : 0.5323            
##          Detection Rate : 0.4516            
##    Detection Prevalence : 0.6774            
##       Balanced Accuracy : 0.6829            
##                                             
##        'Positive' Class : -0.704639727422298
## 

Tabel Evaulasi Model

# Extracting metrics from the confusion matrices
metrics <- data.frame(
  Model = c("SVM Tanpa Preprocessing", "SVM Kernel Radial", "SVM Kernel Linear", "SVM Kernel Polynomial", "SVM Kernel Sigmoid"),
  Accuracy = c(
    p_train_tp$overall["Accuracy"],
    p_train_cm$overall["Accuracy"],
    p_train_kl$overall["Accuracy"],
    p_train_p$overall["Accuracy"],
    p_train_s$overall["Accuracy"]
  ),
  Sensitivity = c(
    p_train_tp$byClass["Sensitivity"],
    p_train_cm$byClass["Sensitivity"],
    p_train_kl$byClass["Sensitivity"],
    p_train_p$byClass["Sensitivity"],
    p_train_s$byClass["Sensitivity"]
  ),
  Specificity = c(
    p_train_tp$byClass["Specificity"],
    p_train_cm$byClass["Specificity"],
    p_train_kl$byClass["Specificity"],
    p_train_p$byClass["Specificity"],
    p_train_s$byClass["Specificity"]
  ),
  Kappa = c(
    p_train_tp$overall["Kappa"],
    p_train_cm$overall["Kappa"],
    p_train_kl$overall["Kappa"],
    p_train_p$overall["Kappa"],
    p_train_s$overall["Kappa"]
  )
)

# Display the metrics data frame
print(metrics)
##                     Model  Accuracy Sensitivity Specificity     Kappa
## 1 SVM Tanpa Preprocessing 0.8104839   0.9812500   0.5000000 0.5376071
## 2       SVM Kernel Radial 0.8951613   0.9696970   0.8103448 0.7874473
## 3       SVM Kernel Linear 0.7419355   0.8333333   0.6379310 0.4762408
## 4   SVM Kernel Polynomial 0.8387097   1.0000000   0.6551724 0.6691569
## 5      SVM Kernel Sigmoid 0.6935484   0.8484848   0.5172414 0.3727370

Importance Variable

# Fit the SVM model using caret's train function
Model <- train(promo ~ ., data = Train, method = "svmRadial", trControl = trainControl(method = "cv"))

# Extract variable importance
svm.imp <- varImp(Model)

# Create a variable importance plot
plot(svm.imp)