Eksplorasi Data

library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#training = read.csv("D:\\Semester 6\\TPM\\Kuliah\\Tugas Individu\\Data Training.csv", sep = ";", stringsAsFactors = TRUE)
#testing = read.csv("D:\\Semester 6\\TPM\\Kuliah\\Tugas Individu\\Data Testing.csv", sep = ";", stringsAsFactors = TRUE)
# Simpan data training ke folder tertentu
#write.csv(training, "Data_Training_Clean.csv", row.names = FALSE)

# Simpan data testing ke folder tertentu
#write.csv(testing, "Data_Testing_Clean.csv", row.names = FALSE)
# Input Data Training 
train = read.csv("D:\\Semester 6\\TPM\\Kuliah\\Tugas Individu\\Data_Training_Clean.csv")

# Input Data Testing 
test = read.csv("D:\\Semester 6\\TPM\\Kuliah\\Tugas Individu\\Data_Testing_Clean.csv")
str(train)
## 'data.frame':    41188 obs. of  21 variables:
##  $ age           : int  56 57 37 40 56 45 59 41 24 25 ...
##  $ job           : chr  "housemaid" "services" "services" "admin." ...
##  $ marital       : chr  "married" "married" "married" "married" ...
##  $ education     : chr  "basic.4y" "high.school" "high.school" "basic.6y" ...
##  $ default       : chr  "no" "" "no" "no" ...
##  $ housing       : chr  "no" "no" "yes" "no" ...
##  $ loan          : chr  "no" "no" "no" "no" ...
##  $ contact       : chr  "telephone" "telephone" "telephone" "telephone" ...
##  $ month         : chr  "may" "may" "may" "may" ...
##  $ day_of_week   : chr  "mon" "mon" "mon" "mon" ...
##  $ duration      : int  261 149 226 151 307 198 139 217 380 50 ...
##  $ campaign      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ pdays         : int  999 999 999 999 999 999 999 999 999 999 ...
##  $ previous      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ poutcome      : chr  "nonexistent" "nonexistent" "nonexistent" "nonexistent" ...
##  $ emp.var.rate  : num  1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 ...
##  $ cons.price.idx: num  94 94 94 94 94 ...
##  $ cons.conf.idx : num  -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 ...
##  $ euribor3m     : num  4.86 4.86 4.86 4.86 4.86 ...
##  $ nr.employed   : num  5191 5191 5191 5191 5191 ...
##  $ y             : chr  "no" "no" "no" "no" ...
head(train)
##   age       job marital   education default housing loan   contact month
## 1  56 housemaid married    basic.4y      no      no   no telephone   may
## 2  57  services married high.school              no   no telephone   may
## 3  37  services married high.school      no     yes   no telephone   may
## 4  40    admin. married    basic.6y      no      no   no telephone   may
## 5  56  services married high.school      no      no  yes telephone   may
## 6  45  services married    basic.9y              no   no telephone   may
##   day_of_week duration campaign pdays previous    poutcome emp.var.rate
## 1         mon      261        1   999        0 nonexistent          1.1
## 2         mon      149        1   999        0 nonexistent          1.1
## 3         mon      226        1   999        0 nonexistent          1.1
## 4         mon      151        1   999        0 nonexistent          1.1
## 5         mon      307        1   999        0 nonexistent          1.1
## 6         mon      198        1   999        0 nonexistent          1.1
##   cons.price.idx cons.conf.idx euribor3m nr.employed  y
## 1         93.994         -36.4     4.857        5191 no
## 2         93.994         -36.4     4.857        5191 no
## 3         93.994         -36.4     4.857        5191 no
## 4         93.994         -36.4     4.857        5191 no
## 5         93.994         -36.4     4.857        5191 no
## 6         93.994         -36.4     4.857        5191 no
str(test)
## 'data.frame':    4119 obs. of  21 variables:
##  $ age           : int  30 39 25 38 47 32 32 41 31 35 ...
##  $ job           : chr  "blue-collar" "services" "services" "services" ...
##  $ marital       : chr  "married" "single" "married" "married" ...
##  $ education     : chr  "basic.9y" "high.school" "high.school" "basic.9y" ...
##  $ default       : chr  "no" "no" "no" "no" ...
##  $ housing       : chr  "yes" "no" "yes" "" ...
##  $ loan          : chr  "no" "no" "no" "" ...
##  $ contact       : chr  "cellular" "telephone" "telephone" "telephone" ...
##  $ month         : chr  "may" "may" "jun" "jun" ...
##  $ day_of_week   : chr  "fri" "fri" "wed" "fri" ...
##  $ duration      : int  487 346 227 17 58 128 290 44 68 170 ...
##  $ campaign      : int  2 4 1 3 1 3 4 2 1 1 ...
##  $ pdays         : int  999 999 999 999 999 999 999 999 999 999 ...
##  $ previous      : int  0 0 0 0 0 2 0 0 1 0 ...
##  $ poutcome      : chr  "nonexistent" "nonexistent" "nonexistent" "nonexistent" ...
##  $ emp.var.rate  : num  -1.8 1.1 1.4 1.4 -0.1 -1.1 -1.1 -0.1 -0.1 1.1 ...
##  $ cons.price.idx: num  92.9 94 94.5 94.5 93.2 ...
##  $ cons.conf.idx : num  -46.2 -36.4 -41.8 -41.8 -42 -37.5 -37.5 -42 -42 -36.4 ...
##  $ euribor3m     : num  1.31 4.86 4.96 4.96 4.19 ...
##  $ nr.employed   : num  5099 5191 5228 5228 5196 ...
##  $ y             : chr  "no" "no" "no" "no" ...
head(test)
##   age         job marital         education default housing loan   contact
## 1  30 blue-collar married          basic.9y      no     yes   no  cellular
## 2  39    services  single       high.school      no      no   no telephone
## 3  25    services married       high.school      no     yes   no telephone
## 4  38    services married          basic.9y      no              telephone
## 5  47      admin. married university.degree      no     yes   no  cellular
## 6  32    services  single university.degree      no      no   no  cellular
##   month day_of_week duration campaign pdays previous    poutcome emp.var.rate
## 1   may         fri      487        2   999        0 nonexistent         -1.8
## 2   may         fri      346        4   999        0 nonexistent          1.1
## 3   jun         wed      227        1   999        0 nonexistent          1.4
## 4   jun         fri       17        3   999        0 nonexistent          1.4
## 5   nov         mon       58        1   999        0 nonexistent         -0.1
## 6   sep         thu      128        3   999        2     failure         -1.1
##   cons.price.idx cons.conf.idx euribor3m nr.employed  y
## 1         92.893         -46.2     1.313      5099.1 no
## 2         93.994         -36.4     4.855      5191.0 no
## 3         94.465         -41.8     4.962      5228.1 no
## 4         94.465         -41.8     4.959      5228.1 no
## 5         93.200         -42.0     4.191      5195.8 no
## 6         94.199         -37.5     0.884      4963.6 no
nrow(train)
## [1] 41188
nrow(test)
## [1] 4119
ncol(train)
## [1] 21
ncol(test)
## [1] 21

Preprocessing Data

Hapus kolom duration

# Hapus kolom `duration`
train$duration = NULL 
test$duration = NULL
# Cek apakah masih ada kolom duration
"duration" %in% names(train)
## [1] FALSE
"duration" %in% names(test)
## [1] FALSE

Missing Value

# Definisikan NA
train = read.csv("D:/Semester 6/TPM/Kuliah/Tugas Individu/Data_Training_Clean.csv",
                 na.strings = c("", "NA", " ", "unknown")) 

test = read.csv("D:/Semester 6/TPM/Kuliah/Tugas Individu/Data_Testing_Clean.csv", 
                 na.strings = c("", "NA", " ", "unknown")) 

# Cek missing Value 
sum(is.na(train))
## [1] 12718
sum(is.na(test))
## [1] 1230

Ada 12.718 NA pada data train dan 1.230 NA pada data test. Data ini tidak bisa langsung dihapus mengingat risiko yang besar untuk kehilangan informasi penting. Seningga akan dilakukan imputasi dengan metode KNN (K-Nearest Neighbor).

# Melihat jumlah NA di setiap kolom pada data Training
na_train = colSums(is.na(train))
na_train[na_train > 0] # Hanya tampilkan kolom yang ada NA-nya
##       job   marital education   default   housing      loan 
##       330        80      1731      8597       990       990
# Melihat jumlah NA di setiap kolom pada data Testing
na_test = colSums(is.na(test))
na_test[na_test > 0]
##       job   marital education   default   housing      loan 
##        39        11       167       803       105       105

Imputasi KNN (K-Nearest Neighbors)

Data kosong diisi dengan pendekatan KNN, akan memprediksi nilai kosong berdasarkan amatan lain yang mirip secara karakter/profilnya atau secara kemiripan di peubah X yang lain.

# KNN (K-Nearest Neighbor)
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
## 
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
## 
##     sleep
# Imputasi data Testing
train_imputed = kNN(train, k = 5) # k=5 -->  melihat 5 tetangga terdekat yang paling mirip

# Imputasi data Testing
test_imputed = kNN(test, k = 5)

# Hapus kolom tambahan yang dibuat oleh VIM  
train = train_imputed[, 1:20] # Sesuaikan dengan jumlah kolom asli kamu
test = test_imputed[, 1:20]

# Sekarang cek NA 
sum(is.na(train))
## [1] 0
# Menghapus hanya kolom yang berakhiran "_imp" hasil dari kNN
train = train_imputed[, !grepl("_imp", names(train_imputed))]
test = test_imputed[, !grepl("_imp", names(test_imputed))]

# Pastikan variabel y adalah faktor (penting untuk dummyVars)
train$y = as.factor(train$y)
test$y = as.factor(test$y)
names(train)
##  [1] "age"            "job"            "marital"        "education"     
##  [5] "default"        "housing"        "loan"           "contact"       
##  [9] "month"          "day_of_week"    "duration"       "campaign"      
## [13] "pdays"          "previous"       "poutcome"       "emp.var.rate"  
## [17] "cons.price.idx" "cons.conf.idx"  "euribor3m"      "nr.employed"   
## [21] "y"
"y" %in% names(train)
## [1] TRUE

One-Hot Encoding

Mengubah kategori menjadi numerik untuk bisa dibaca oleh ANN.

# Mengubah kolom kategori menjadi biner (1 atau 0) 
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
# Buat dummy utnuk peubah prediktor 
dmy = dummyVars(" ~ . -y", data = train) # dummy kecuali untuk Y 
train.x = data.frame(predict(dmy, newdata = train))
test.x = data.frame(predict(dmy, newdata = test))

# Masukkan Y kembali ke dalam data 
train.y = ifelse(train$y == "yes", 1, 0)
test.y = ifelse(test$y == "yes", 1, 0)

# Menggabungkan kembali data
train = cbind(train.x, y = train.y)
test = cbind(test.x, y = test.y)
# Cek kembali data 
head(train)
##   age jobadmin. jobblue.collar jobentrepreneur jobhousemaid jobmanagement
## 1  56         0              0               0            1             0
## 2  57         0              0               0            0             0
## 3  37         0              0               0            0             0
## 4  40         1              0               0            0             0
## 5  56         0              0               0            0             0
## 6  45         0              0               0            0             0
##   jobretired jobself.employed jobservices jobstudent jobtechnician
## 1          0                0           0          0             0
## 2          0                0           1          0             0
## 3          0                0           1          0             0
## 4          0                0           0          0             0
## 5          0                0           1          0             0
## 6          0                0           1          0             0
##   jobunemployed maritaldivorced maritalmarried maritalsingle educationbasic.4y
## 1             0               0              1             0                 1
## 2             0               0              1             0                 0
## 3             0               0              1             0                 0
## 4             0               0              1             0                 0
## 5             0               0              1             0                 0
## 6             0               0              1             0                 0
##   educationbasic.6y educationbasic.9y educationhigh.school educationilliterate
## 1                 0                 0                    0                   0
## 2                 0                 0                    1                   0
## 3                 0                 0                    1                   0
## 4                 1                 0                    0                   0
## 5                 0                 0                    1                   0
## 6                 0                 1                    0                   0
##   educationprofessional.course educationuniversity.degree defaultno defaultyes
## 1                            0                          0         1          0
## 2                            0                          0         1          0
## 3                            0                          0         1          0
## 4                            0                          0         1          0
## 5                            0                          0         1          0
## 6                            0                          0         1          0
##   housingno housingyes loanno loanyes contactcellular contacttelephone monthapr
## 1         1          0      1       0               0                1        0
## 2         1          0      1       0               0                1        0
## 3         0          1      1       0               0                1        0
## 4         1          0      1       0               0                1        0
## 5         1          0      0       1               0                1        0
## 6         1          0      1       0               0                1        0
##   monthaug monthdec monthjul monthjun monthmar monthmay monthnov monthoct
## 1        0        0        0        0        0        1        0        0
## 2        0        0        0        0        0        1        0        0
## 3        0        0        0        0        0        1        0        0
## 4        0        0        0        0        0        1        0        0
## 5        0        0        0        0        0        1        0        0
## 6        0        0        0        0        0        1        0        0
##   monthsep day_of_weekfri day_of_weekmon day_of_weekthu day_of_weektue
## 1        0              0              1              0              0
## 2        0              0              1              0              0
## 3        0              0              1              0              0
## 4        0              0              1              0              0
## 5        0              0              1              0              0
## 6        0              0              1              0              0
##   day_of_weekwed duration campaign pdays previous poutcomefailure
## 1              0      261        1   999        0               0
## 2              0      149        1   999        0               0
## 3              0      226        1   999        0               0
## 4              0      151        1   999        0               0
## 5              0      307        1   999        0               0
## 6              0      198        1   999        0               0
##   poutcomenonexistent poutcomesuccess emp.var.rate cons.price.idx cons.conf.idx
## 1                   1               0          1.1         93.994         -36.4
## 2                   1               0          1.1         93.994         -36.4
## 3                   1               0          1.1         93.994         -36.4
## 4                   1               0          1.1         93.994         -36.4
## 5                   1               0          1.1         93.994         -36.4
## 6                   1               0          1.1         93.994         -36.4
##   euribor3m nr.employed y
## 1     4.857        5191 0
## 2     4.857        5191 0
## 3     4.857        5191 0
## 4     4.857        5191 0
## 5     4.857        5191 0
## 6     4.857        5191 0
head(test)
##   age jobadmin. jobblue.collar jobentrepreneur jobhousemaid jobmanagement
## 1  30         0              1               0            0             0
## 2  39         0              0               0            0             0
## 3  25         0              0               0            0             0
## 4  38         0              0               0            0             0
## 5  47         1              0               0            0             0
## 6  32         0              0               0            0             0
##   jobretired jobself.employed jobservices jobstudent jobtechnician
## 1          0                0           0          0             0
## 2          0                0           1          0             0
## 3          0                0           1          0             0
## 4          0                0           1          0             0
## 5          0                0           0          0             0
## 6          0                0           1          0             0
##   jobunemployed maritaldivorced maritalmarried maritalsingle educationbasic.4y
## 1             0               0              1             0                 0
## 2             0               0              0             1                 0
## 3             0               0              1             0                 0
## 4             0               0              1             0                 0
## 5             0               0              1             0                 0
## 6             0               0              0             1                 0
##   educationbasic.6y educationbasic.9y educationhigh.school educationilliterate
## 1                 0                 1                    0                   0
## 2                 0                 0                    1                   0
## 3                 0                 0                    1                   0
## 4                 0                 1                    0                   0
## 5                 0                 0                    0                   0
## 6                 0                 0                    0                   0
##   educationprofessional.course educationuniversity.degree defaultno defaultyes
## 1                            0                          0         1          0
## 2                            0                          0         1          0
## 3                            0                          0         1          0
## 4                            0                          0         1          0
## 5                            0                          1         1          0
## 6                            0                          1         1          0
##   housingno housingyes loanno loanyes contactcellular contacttelephone monthapr
## 1         0          1      1       0               1                0        0
## 2         1          0      1       0               0                1        0
## 3         0          1      1       0               0                1        0
## 4         0          1      1       0               0                1        0
## 5         0          1      1       0               1                0        0
## 6         1          0      1       0               1                0        0
##   monthaug monthdec monthjul monthjun monthmar monthmay monthnov monthoct
## 1        0        0        0        0        0        1        0        0
## 2        0        0        0        0        0        1        0        0
## 3        0        0        0        1        0        0        0        0
## 4        0        0        0        1        0        0        0        0
## 5        0        0        0        0        0        0        1        0
## 6        0        0        0        0        0        0        0        0
##   monthsep day_of_weekfri day_of_weekmon day_of_weekthu day_of_weektue
## 1        0              1              0              0              0
## 2        0              1              0              0              0
## 3        0              0              0              0              0
## 4        0              1              0              0              0
## 5        0              0              1              0              0
## 6        1              0              0              1              0
##   day_of_weekwed duration campaign pdays previous poutcomefailure
## 1              0      487        2   999        0               0
## 2              0      346        4   999        0               0
## 3              1      227        1   999        0               0
## 4              0       17        3   999        0               0
## 5              0       58        1   999        0               0
## 6              0      128        3   999        2               1
##   poutcomenonexistent poutcomesuccess emp.var.rate cons.price.idx cons.conf.idx
## 1                   1               0         -1.8         92.893         -46.2
## 2                   1               0          1.1         93.994         -36.4
## 3                   1               0          1.4         94.465         -41.8
## 4                   1               0          1.4         94.465         -41.8
## 5                   1               0         -0.1         93.200         -42.0
## 6                   0               0         -1.1         94.199         -37.5
##   euribor3m nr.employed y
## 1     1.313      5099.1 0
## 2     4.855      5191.0 0
## 3     4.962      5228.1 0
## 4     4.959      5228.1 0
## 5     4.191      5195.8 0
## 6     0.884      4963.6 0

Normalisasi

Min-Max scalling untuk mengubah nilai dalam rentang 0 s.d 1, agar dalam penghitungan setiap variabelnya dalam skala yang sama.

# Normalisasi 
normalize = function(x){
  return((x - min(x)) / (max(x) - min(x)))
}

# Lakukan pada peubah X
train_norm = as.data.frame(lapply(train, normalize))
test_norm = as.data.frame(lapply(test, normalize))
summary(train_norm)
##       age           jobadmin.      jobblue.collar   jobentrepreneur  
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.1852   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.2593   Median :0.0000   Median :0.0000   Median :0.00000  
##  Mean   :0.2842   Mean   :0.2541   Mean   :0.2285   Mean   :0.03545  
##  3rd Qu.:0.3704   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.00000  
##   jobhousemaid     jobmanagement       jobretired     jobself.employed 
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.0000   Median :0.00000  
##  Mean   :0.02586   Mean   :0.07128   Mean   :0.0422   Mean   :0.03455  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.00000   Max.   :1.0000   Max.   :1.00000  
##   jobservices        jobstudent      jobtechnician   jobunemployed    
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.000   Median :0.00000  
##  Mean   :0.09699   Mean   :0.02132   Mean   :0.165   Mean   :0.02472  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.00000   Max.   :1.000   Max.   :1.00000  
##  maritaldivorced maritalmarried   maritalsingle    educationbasic.4y
##  Min.   :0.000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   
##  1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   
##  Median :0.000   Median :1.0000   Median :0.0000   Median :0.0000   
##  Mean   :0.112   Mean   :0.6068   Mean   :0.2812   Mean   :0.1082   
##  3rd Qu.:0.000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.0000   
##  Max.   :1.000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   
##  educationbasic.6y educationbasic.9y educationhigh.school educationilliterate
##  Min.   :0.00000   Min.   :0.0000    Min.   :0.0000       Min.   :0.000000   
##  1st Qu.:0.00000   1st Qu.:0.0000    1st Qu.:0.0000       1st Qu.:0.000000   
##  Median :0.00000   Median :0.0000    Median :0.0000       Median :0.000000   
##  Mean   :0.05776   Mean   :0.1546    Mean   :0.2411       Mean   :0.000437   
##  3rd Qu.:0.00000   3rd Qu.:0.0000    3rd Qu.:0.0000       3rd Qu.:0.000000   
##  Max.   :1.00000   Max.   :1.0000    Max.   :1.0000       Max.   :1.000000   
##  educationprofessional.course educationuniversity.degree   defaultno     
##  Min.   :0.0000               Min.   :0.0000             Min.   :0.0000  
##  1st Qu.:0.0000               1st Qu.:0.0000             1st Qu.:1.0000  
##  Median :0.0000               Median :0.0000             Median :1.0000  
##  Mean   :0.1312               Mean   :0.3067             Mean   :0.9999  
##  3rd Qu.:0.0000               3rd Qu.:1.0000             3rd Qu.:1.0000  
##  Max.   :1.0000               Max.   :1.0000             Max.   :1.0000  
##    defaultyes          housingno        housingyes         loanno      
##  Min.   :0.000e+00   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.000e+00   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:1.0000  
##  Median :0.000e+00   Median :0.0000   Median :1.0000   Median :1.0000  
##  Mean   :7.284e-05   Mean   :0.4634   Mean   :0.5366   Mean   :0.8478  
##  3rd Qu.:0.000e+00   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1.000e+00   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##     loanyes       contactcellular  contacttelephone    monthapr     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :1.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.1522   Mean   :0.6347   Mean   :0.3653   Mean   :0.0639  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##     monthaug       monthdec           monthjul         monthjun     
##  Min.   :0.00   Min.   :0.000000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.00   1st Qu.:0.000000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.00   Median :0.000000   Median :0.0000   Median :0.0000  
##  Mean   :0.15   Mean   :0.004419   Mean   :0.1742   Mean   :0.1291  
##  3rd Qu.:0.00   3rd Qu.:0.000000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :1.00   Max.   :1.000000   Max.   :1.0000   Max.   :1.0000  
##     monthmar          monthmay         monthnov          monthoct      
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.0000   Median :0.00000   Median :0.00000  
##  Mean   :0.01326   Mean   :0.3343   Mean   :0.09957   Mean   :0.01743  
##  3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.0000   Max.   :1.00000   Max.   :1.00000  
##     monthsep       day_of_weekfri day_of_weekmon   day_of_weekthu  
##  Min.   :0.00000   Min.   :0.00   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.00   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.00000   Median :0.00   Median :0.0000   Median :0.0000  
##  Mean   :0.01384   Mean   :0.19   Mean   :0.2067   Mean   :0.2094  
##  3rd Qu.:0.00000   3rd Qu.:0.00   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :1.00000   Max.   :1.00   Max.   :1.0000   Max.   :1.0000  
##  day_of_weektue   day_of_weekwed      duration          campaign      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.02074   1st Qu.:0.00000  
##  Median :0.0000   Median :0.0000   Median :0.03660   Median :0.01818  
##  Mean   :0.1964   Mean   :0.1975   Mean   :0.05252   Mean   :0.02850  
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.06486   3rd Qu.:0.03636  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.00000   Max.   :1.00000  
##      pdays           previous       poutcomefailure  poutcomenonexistent
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.0000   Min.   :0.0000     
##  1st Qu.:1.0000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:1.0000     
##  Median :1.0000   Median :0.00000   Median :0.0000   Median :1.0000     
##  Mean   :0.9634   Mean   :0.02471   Mean   :0.1032   Mean   :0.8634     
##  3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:1.0000     
##  Max.   :1.0000   Max.   :1.00000   Max.   :1.0000   Max.   :1.0000     
##  poutcomesuccess    emp.var.rate    cons.price.idx   cons.conf.idx   
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.3333   1st Qu.:0.3406   1st Qu.:0.3389  
##  Median :0.00000   Median :0.9375   Median :0.6033   Median :0.3766  
##  Mean   :0.03333   Mean   :0.7254   Mean   :0.5357   Mean   :0.4309  
##  3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:0.6988   3rd Qu.:0.6025  
##  Max.   :1.00000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##    euribor3m       nr.employed           y         
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.1610   1st Qu.:0.5123   1st Qu.:0.0000  
##  Median :0.9574   Median :0.8597   Median :0.0000  
##  Mean   :0.6772   Mean   :0.7691   Mean   :0.1127  
##  3rd Qu.:0.9810   3rd Qu.:1.0000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000
summary(test_norm)
##       age           jobadmin.      jobblue.collar   jobentrepreneur  
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.2000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.2857   Median :0.0000   Median :0.0000   Median :0.00000  
##  Mean   :0.3159   Mean   :0.2464   Mean   :0.2195   Mean   :0.03593  
##  3rd Qu.:0.4143   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.00000  
##   jobhousemaid     jobmanagement       jobretired      jobself.employed
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median :0.0000  
##  Mean   :0.02695   Mean   :0.07939   Mean   :0.04054   Mean   :0.0386  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.0000  
##  Max.   :1.00000   Max.   :1.00000   Max.   :1.00000   Max.   :1.0000  
##   jobservices        jobstudent      jobtechnician    jobunemployed    
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.00000   Median :0.0000   Median :0.00000  
##  Mean   :0.09663   Mean   :0.01991   Mean   :0.1692   Mean   :0.02695  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.00000   Max.   :1.0000   Max.   :1.00000  
##  maritaldivorced  maritalmarried   maritalsingle    educationbasic.4y
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.000    
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000    
##  Median :0.0000   Median :1.0000   Median :0.0000   Median :0.000    
##  Mean   :0.1083   Mean   :0.6113   Mean   :0.2804   Mean   :0.109    
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.000    
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.000    
##  educationbasic.6y educationbasic.9y educationhigh.school educationilliterate
##  Min.   :0.0000    Min.   :0.0000    Min.   :0.0000       Min.   :0.0000000  
##  1st Qu.:0.0000    1st Qu.:0.0000    1st Qu.:0.0000       1st Qu.:0.0000000  
##  Median :0.0000    Median :0.0000    Median :0.0000       Median :0.0000000  
##  Mean   :0.0573    Mean   :0.1466    Mean   :0.2314       Mean   :0.0002428  
##  3rd Qu.:0.0000    3rd Qu.:0.0000    3rd Qu.:0.0000       3rd Qu.:0.0000000  
##  Max.   :1.0000    Max.   :1.0000    Max.   :1.0000       Max.   :1.0000000  
##  educationprofessional.course educationuniversity.degree   defaultno     
##  Min.   :0.0000               Min.   :0.000              Min.   :0.0000  
##  1st Qu.:0.0000               1st Qu.:0.000              1st Qu.:1.0000  
##  Median :0.0000               Median :0.000              Median :1.0000  
##  Mean   :0.1345               Mean   :0.321              Mean   :0.9998  
##  3rd Qu.:0.0000               3rd Qu.:1.000              3rd Qu.:1.0000  
##  Max.   :1.0000               Max.   :1.000              Max.   :1.0000  
##    defaultyes          housingno        housingyes         loanno      
##  Min.   :0.0000000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:1.0000  
##  Median :0.0000000   Median :0.0000   Median :1.0000   Median :1.0000  
##  Mean   :0.0002428   Mean   :0.4574   Mean   :0.5426   Mean   :0.8376  
##  3rd Qu.:0.0000000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1.0000000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##     loanyes       contactcellular  contacttelephone    monthapr     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :1.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.1624   Mean   :0.6438   Mean   :0.3562   Mean   :0.0522  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##     monthaug         monthdec           monthjul         monthjun     
##  Min.   :0.0000   Min.   :0.000000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.000000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.000000   Median :0.0000   Median :0.0000  
##  Mean   :0.1544   Mean   :0.005341   Mean   :0.1726   Mean   :0.1287  
##  3rd Qu.:0.0000   3rd Qu.:0.000000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.000000   Max.   :1.0000   Max.   :1.0000  
##     monthmar          monthmay         monthnov         monthoct      
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.00000   Median :0.0000   Median :0.0000   Median :0.00000  
##  Mean   :0.01165   Mean   :0.3345   Mean   :0.1083   Mean   :0.01675  
##  3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :1.00000   Max.   :1.0000   Max.   :1.0000   Max.   :1.00000  
##     monthsep       day_of_weekfri   day_of_weekmon   day_of_weekthu  
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.00000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.01554   Mean   :0.1865   Mean   :0.2076   Mean   :0.2088  
##  3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :1.00000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##  day_of_weektue   day_of_weekwed     duration          campaign      
##  Min.   :0.0000   Min.   :0.000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.02827   1st Qu.:0.00000  
##  Median :0.0000   Median :0.000   Median :0.04968   Median :0.02941  
##  Mean   :0.2042   Mean   :0.193   Mean   :0.07049   Mean   :0.04521  
##  3rd Qu.:0.0000   3rd Qu.:0.000   3rd Qu.:0.08702   3rd Qu.:0.05882  
##  Max.   :1.0000   Max.   :1.000   Max.   :1.00000   Max.   :1.00000  
##      pdays           previous       poutcomefailure  poutcomenonexistent
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.0000   Min.   :0.0000     
##  1st Qu.:1.0000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:1.0000     
##  Median :1.0000   Median :0.00000   Median :0.0000   Median :1.0000     
##  Mean   :0.9614   Mean   :0.03172   Mean   :0.1102   Mean   :0.8553     
##  3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:1.0000     
##  Max.   :1.0000   Max.   :1.00000   Max.   :1.0000   Max.   :1.0000     
##  poutcomesuccess    emp.var.rate    cons.price.idx   cons.conf.idx   
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.3333   1st Qu.:0.3406   1st Qu.:0.3389  
##  Median :0.00000   Median :0.9375   Median :0.6033   Median :0.3766  
##  Mean   :0.03447   Mean   :0.7260   Mean   :0.5373   Mean   :0.4310  
##  3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:0.6988   3rd Qu.:0.6025  
##  Max.   :1.00000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##    euribor3m       nr.employed           y         
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.1585   1st Qu.:0.5123   1st Qu.:0.0000  
##  Median :0.9574   Median :0.8597   Median :0.0000  
##  Mean   :0.6772   Mean   :0.7670   Mean   :0.1095  
##  3rd Qu.:0.9810   3rd Qu.:1.0000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000

ANN (Artificial Neural Network)

Model ANN

Menggunakan package neuralnet dengan 5 neuron dipilih untuk menjaga model tetap stabil dengan kondisi data ini dan 1 layer dipilih karena dalam analisis ini dipakai rprop yang cukup stabil dan robust (* akan terbukti di hasil analisisnya nanti pada data ini).

library(neuralnet)
## 
## Attaching package: 'neuralnet'
## The following object is masked from 'package:dplyr':
## 
##     compute
set.seed(333)
n = neuralnet(y ~ ., 
              data = train_norm,  # Pakai data yang sudah dinormalisasi
              hidden = 5, # 5 neuron dengan 1 hidden layer 
              err.fct = "ce", 
              linear.output = FALSE, 
              lifesign = 'full', # Akan memunculkan progress hitungan di console
              rep = 1, # Melakukan pengulangan 1x
              threshold = 0.5,
              algorithm = "rprop+", # Backpropagation
              stepmax = 1000000)  # Stepmax diperbesar agar model bisa bekerja lebih baik
## hidden: 5 thresh: 0.5 rep: 1/1 steps:
##    1000  min thresh: 4.51756192957293
##                                                   2000   min thresh: 2.6940431483678
##                                                   3000   min thresh: 2.04183640530384
##                                                   4000   min thresh: 1.53158637503712
##                                                   5000   min thresh: 1.22760204798536
##                                                   6000   min thresh: 1.07019060866538
##                                                   7000   min thresh: 1.02445317332697
##                                                   8000   min thresh: 1.01035960120116
##                                                   9000   min thresh: 1.01035960120116
##                                                  10000   min thresh: 1.01035960120116
##                                                  11000   min thresh: 1.01035960120116
##                                                  12000   min thresh: 1.01035960120116
##                                                  13000   min thresh: 1.01035960120116
##                                                  14000   min thresh: 1.01035960120116
##                                                  15000   min thresh: 1.01035960120116
##                                                  16000   min thresh: 1.01035960120116
##                                                  17000   min thresh: 1.01035960120116
##                                                  18000   min thresh: 1.01035960120116
##                                                  19000   min thresh: 1.01035960120116
##                                                  20000   min thresh: 1.01035960120116
##                                                  21000   min thresh: 1.00331206301678
##                                                  22000   min thresh: 0.950637281701579
##                                                  23000   min thresh: 0.869661429369372
##                                                  24000   min thresh: 0.830525055428064
##                                                  25000   min thresh: 0.737456086044859
##                                                  26000   min thresh: 0.737456086044859
##                                                  27000   min thresh: 0.648000240545284
##                                                  28000   min thresh: 0.648000240545284
##                                                  29000   min thresh: 0.629067224711656
##                                                  30000   min thresh: 0.614562138274845
##                                                  31000   min thresh: 0.576874195513038
##                                                  32000   min thresh: 0.576874195513038
##                                                  33000   min thresh: 0.547752033409284
##                                                  34000   min thresh: 0.540854212175597
##                                                  35000   min thresh: 0.535587507244335
##                                                  35357   error: 6974.57901   time: 1.08 hours

Model melakukan 22.331 kali iterasi backpropagation untuk mengevaluasi dan memperbaiki bobot. Error yang dihasilkan adalah 6927.83 (ce = Cross-Entropy), nilai ini merupakan akumulasi dari seluruh observasi data train sebanyak 41.188 amatan, nilai ini cukup wajar jika dilihat dari banyaknya data. Karena jika dirata-ratakan dengan data, maka rataan errornya adalah 6.927,83/41.188 = 0,168.

summary(n)
##                     Length  Class      Mode    
## call                     11 -none-     call    
## response              41188 -none-     numeric 
## covariate           2347716 -none-     numeric 
## model.list                2 -none-     list    
## err.fct                   1 -none-     function
## act.fct                   1 -none-     function
## linear.output             1 -none-     logical 
## data                     58 data.frame list    
## exclude                   0 -none-     NULL    
## net.result                1 -none-     list    
## weights                   1 -none-     list    
## generalized.weights       1 -none-     list    
## startweights              1 -none-     list    
## result.matrix           299 -none-     numeric
# Visualisasi Plot Neural Network
plot(n, rep = 1)

# Error 
n$result.matrix
##                                                   [,1]
## error                                     6.974579e+03
## reached.threshold                         4.846096e-01
## steps                                     3.535700e+04
## Intercept.to.1layhid1                    -3.943175e-02
## age.to.1layhid1                          -3.380416e-01
## jobadmin..to.1layhid1                    -8.537146e-01
## jobblue.collar.to.1layhid1               -5.787506e-01
## jobentrepreneur.to.1layhid1               5.122906e-02
## jobhousemaid.to.1layhid1                 -1.414206e+00
## jobmanagement.to.1layhid1                -1.147781e-01
## jobretired.to.1layhid1                   -1.825574e+00
## jobself.employed.to.1layhid1             -9.514744e-01
## jobservices.to.1layhid1                  -5.352231e-01
## jobstudent.to.1layhid1                   -1.267220e+00
## jobtechnician.to.1layhid1                -1.010304e+00
## jobunemployed.to.1layhid1                -1.280660e+00
## maritaldivorced.to.1layhid1              -3.348284e-01
## maritalmarried.to.1layhid1               -3.470448e-01
## maritalsingle.to.1layhid1                -4.468002e-01
## educationbasic.4y.to.1layhid1             3.652409e-01
## educationbasic.6y.to.1layhid1             2.898725e-01
## educationbasic.9y.to.1layhid1             4.590176e-01
## educationhigh.school.to.1layhid1          2.291217e-01
## educationilliterate.to.1layhid1          -1.091405e+00
## educationprofessional.course.to.1layhid1 -2.692769e-02
## educationuniversity.degree.to.1layhid1   -5.980826e-01
## defaultno.to.1layhid1                    -1.579376e-02
## defaultyes.to.1layhid1                    4.763746e+01
## housingno.to.1layhid1                     9.178156e-01
## housingyes.to.1layhid1                    6.431407e-01
## loanno.to.1layhid1                        2.282163e+00
## loanyes.to.1layhid1                       2.150783e+00
## contactcellular.to.1layhid1              -1.770463e+00
## contacttelephone.to.1layhid1             -2.532077e+00
## monthapr.to.1layhid1                      8.152909e+00
## monthaug.to.1layhid1                      1.265208e+00
## monthdec.to.1layhid1                      4.940613e+00
## monthjul.to.1layhid1                     -1.777568e+00
## monthjun.to.1layhid1                     -7.091628e+02
## monthmar.to.1layhid1                      1.211384e+00
## monthmay.to.1layhid1                      4.029653e-01
## monthnov.to.1layhid1                     -4.664547e+00
## monthoct.to.1layhid1                     -7.153076e+02
## monthsep.to.1layhid1                      2.670720e+00
## day_of_weekfri.to.1layhid1                1.282531e+00
## day_of_weekmon.to.1layhid1                1.170757e+00
## day_of_weekthu.to.1layhid1                6.745973e-01
## day_of_weektue.to.1layhid1                2.372688e-01
## day_of_weekwed.to.1layhid1                2.297081e-01
## duration.to.1layhid1                     -1.718646e+01
## campaign.to.1layhid1                      2.322730e+00
## pdays.to.1layhid1                         9.301920e-01
## previous.to.1layhid1                      1.552640e+00
## poutcomefailure.to.1layhid1               2.470593e+00
## poutcomenonexistent.to.1layhid1           1.926799e+00
## poutcomesuccess.to.1layhid1               1.984421e+00
## emp.var.rate.to.1layhid1                 -3.418270e+01
## cons.price.idx.to.1layhid1                1.348409e+01
## cons.conf.idx.to.1layhid1                -8.036387e+00
## euribor3m.to.1layhid1                     4.020901e+01
## nr.employed.to.1layhid1                  -1.976946e+01
## Intercept.to.1layhid2                    -5.369323e-01
## age.to.1layhid2                          -3.768306e-01
## jobadmin..to.1layhid2                    -8.018530e-01
## jobblue.collar.to.1layhid2               -7.259581e-01
## jobentrepreneur.to.1layhid2              -4.221833e-01
## jobhousemaid.to.1layhid2                 -6.964847e-01
## jobmanagement.to.1layhid2                -6.759772e-01
## jobretired.to.1layhid2                   -9.358644e-01
## jobself.employed.to.1layhid2             -7.831878e-01
## jobservices.to.1layhid2                  -9.095970e-01
## jobstudent.to.1layhid2                   -1.123828e+00
## jobtechnician.to.1layhid2                -8.650229e-01
## jobunemployed.to.1layhid2                -9.647325e-01
## maritaldivorced.to.1layhid2              -1.024932e+00
## maritalmarried.to.1layhid2               -9.655192e-01
## maritalsingle.to.1layhid2                -1.040302e+00
## educationbasic.4y.to.1layhid2            -2.063645e-01
## educationbasic.6y.to.1layhid2             1.350353e-01
## educationbasic.9y.to.1layhid2            -2.313404e-01
## educationhigh.school.to.1layhid2         -3.089856e-02
## educationilliterate.to.1layhid2          -1.242103e+00
## educationprofessional.course.to.1layhid2 -2.990263e-01
## educationuniversity.degree.to.1layhid2   -3.521732e-01
## defaultno.to.1layhid2                    -5.233563e-02
## defaultyes.to.1layhid2                    3.707040e+01
## housingno.to.1layhid2                    -8.958838e-01
## housingyes.to.1layhid2                   -9.591289e-01
## loanno.to.1layhid2                       -1.049520e-01
## loanyes.to.1layhid2                      -2.909156e-03
## contactcellular.to.1layhid2              -9.407004e-02
## contacttelephone.to.1layhid2             -6.652356e-01
## monthapr.to.1layhid2                     -7.080869e+02
## monthaug.to.1layhid2                     -9.220796e-01
## monthdec.to.1layhid2                     -7.095184e+02
## monthjul.to.1layhid2                     -9.155665e-01
## monthjun.to.1layhid2                     -5.018353e-01
## monthmar.to.1layhid2                     -1.128624e+00
## monthmay.to.1layhid2                     -2.473248e-01
## monthnov.to.1layhid2                     -3.944530e-03
## monthoct.to.1layhid2                     -7.085860e+02
## monthsep.to.1layhid2                     -1.534647e+00
## day_of_weekfri.to.1layhid2               -7.606146e-02
## day_of_weekmon.to.1layhid2                8.339708e-02
## day_of_weekthu.to.1layhid2                5.720414e-02
## day_of_weektue.to.1layhid2                1.521139e-01
## day_of_weekwed.to.1layhid2                1.098206e-01
## duration.to.1layhid2                     -2.236398e+01
## campaign.to.1layhid2                      1.372108e+00
## pdays.to.1layhid2                        -6.679716e-02
## previous.to.1layhid2                     -1.705194e+00
## poutcomefailure.to.1layhid2               2.026577e-01
## poutcomenonexistent.to.1layhid2          -2.186116e-01
## poutcomesuccess.to.1layhid2              -3.454041e-01
## emp.var.rate.to.1layhid2                  3.089305e+00
## cons.price.idx.to.1layhid2                8.762296e-01
## cons.conf.idx.to.1layhid2                 1.525068e+00
## euribor3m.to.1layhid2                    -4.482827e+00
## nr.employed.to.1layhid2                   5.739464e+00
## Intercept.to.1layhid3                    -6.078767e-01
## age.to.1layhid3                           6.491314e-01
## jobadmin..to.1layhid3                     3.881111e-03
## jobblue.collar.to.1layhid3                3.538709e-01
## jobentrepreneur.to.1layhid3              -3.382814e-02
## jobhousemaid.to.1layhid3                  4.971060e-01
## jobmanagement.to.1layhid3                -4.532082e-01
## jobretired.to.1layhid3                    2.263087e-01
## jobself.employed.to.1layhid3              6.290454e-02
## jobservices.to.1layhid3                  -2.219427e-01
## jobstudent.to.1layhid3                    5.021298e-01
## jobtechnician.to.1layhid3                 1.651778e-01
## jobunemployed.to.1layhid3                 2.148394e-01
## maritaldivorced.to.1layhid3              -1.458897e+00
## maritalmarried.to.1layhid3               -1.626457e+00
## maritalsingle.to.1layhid3                -1.456964e+00
## educationbasic.4y.to.1layhid3            -8.804237e-01
## educationbasic.6y.to.1layhid3            -6.056060e-01
## educationbasic.9y.to.1layhid3            -4.398461e-01
## educationhigh.school.to.1layhid3         -6.445914e-01
## educationilliterate.to.1layhid3          -7.122754e+02
## educationprofessional.course.to.1layhid3 -3.683024e-01
## educationuniversity.degree.to.1layhid3   -1.562856e-01
## defaultno.to.1layhid3                     1.693844e+00
## defaultyes.to.1layhid3                    4.365319e+01
## housingno.to.1layhid3                    -8.760821e-01
## housingyes.to.1layhid3                   -8.368634e-01
## loanno.to.1layhid3                        5.752606e-01
## loanyes.to.1layhid3                       4.435606e-01
## contactcellular.to.1layhid3               5.741348e-01
## contacttelephone.to.1layhid3              1.733312e+00
## monthapr.to.1layhid3                      7.207845e-01
## monthaug.to.1layhid3                      1.349846e+00
## monthdec.to.1layhid3                      2.360126e+00
## monthjul.to.1layhid3                      1.214665e+00
## monthjun.to.1layhid3                      1.188027e+00
## monthmar.to.1layhid3                      1.175181e+00
## monthmay.to.1layhid3                     -1.944711e-01
## monthnov.to.1layhid3                      3.761273e-01
## monthoct.to.1layhid3                      1.839257e+00
## monthsep.to.1layhid3                      1.544431e+00
## day_of_weekfri.to.1layhid3               -6.147338e-02
## day_of_weekmon.to.1layhid3                1.577726e-01
## day_of_weekthu.to.1layhid3                1.357085e-01
## day_of_weektue.to.1layhid3               -3.912529e-01
## day_of_weekwed.to.1layhid3               -4.310578e-02
## duration.to.1layhid3                     -7.800396e+01
## campaign.to.1layhid3                      6.034265e+00
## pdays.to.1layhid3                         2.928693e-01
## previous.to.1layhid3                      2.031599e-02
## poutcomefailure.to.1layhid3              -4.314237e-01
## poutcomenonexistent.to.1layhid3          -5.011482e-01
## poutcomesuccess.to.1layhid3              -1.028814e+00
## emp.var.rate.to.1layhid3                  6.970610e-01
## cons.price.idx.to.1layhid3                3.663608e-01
## cons.conf.idx.to.1layhid3                -6.334230e-01
## euribor3m.to.1layhid3                    -3.042397e+00
## nr.employed.to.1layhid3                  -8.080660e-01
## Intercept.to.1layhid4                     2.424285e+00
## age.to.1layhid4                          -2.493691e-01
## jobadmin..to.1layhid4                    -1.351393e-01
## jobblue.collar.to.1layhid4               -7.612504e-01
## jobentrepreneur.to.1layhid4               1.751436e+00
## jobhousemaid.to.1layhid4                 -1.917219e+00
## jobmanagement.to.1layhid4                -1.318798e-01
## jobretired.to.1layhid4                   -6.981192e-01
## jobself.employed.to.1layhid4             -1.301138e+00
## jobservices.to.1layhid4                   3.176494e-02
## jobstudent.to.1layhid4                   -4.157002e-01
## jobtechnician.to.1layhid4                -2.442118e-01
## jobunemployed.to.1layhid4                -1.431643e+00
## maritaldivorced.to.1layhid4              -2.503322e-01
## maritalmarried.to.1layhid4                3.279044e-02
## maritalsingle.to.1layhid4                -3.391472e-02
## educationbasic.4y.to.1layhid4             4.212548e-01
## educationbasic.6y.to.1layhid4             3.636992e+00
## educationbasic.9y.to.1layhid4             8.225709e-01
## educationhigh.school.to.1layhid4          6.971041e-01
## educationilliterate.to.1layhid4          -4.688641e+01
## educationprofessional.course.to.1layhid4  5.656821e-01
## educationuniversity.degree.to.1layhid4    6.484980e-01
## defaultno.to.1layhid4                     2.881661e+00
## defaultyes.to.1layhid4                   -7.901246e+02
## housingno.to.1layhid4                     7.699659e-01
## housingyes.to.1layhid4                    4.208832e-01
## loanno.to.1layhid4                        4.715331e-01
## loanyes.to.1layhid4                       1.696820e+00
## contactcellular.to.1layhid4               1.387755e+00
## contacttelephone.to.1layhid4             -8.376486e-01
## monthapr.to.1layhid4                      4.430051e+01
## monthaug.to.1layhid4                      3.698935e+01
## monthdec.to.1layhid4                      6.781209e+00
## monthjul.to.1layhid4                      1.132885e+00
## monthjun.to.1layhid4                     -2.189785e+00
## monthmar.to.1layhid4                      2.514806e+01
## monthmay.to.1layhid4                     -1.375355e+00
## monthnov.to.1layhid4                      8.963554e+01
## monthoct.to.1layhid4                      9.039989e+00
## monthsep.to.1layhid4                     -2.099866e+00
## day_of_weekfri.to.1layhid4                3.971958e-01
## day_of_weekmon.to.1layhid4                9.094016e-01
## day_of_weekthu.to.1layhid4               -8.500766e-01
## day_of_weektue.to.1layhid4               -2.764588e-02
## day_of_weekwed.to.1layhid4                3.729230e-01
## duration.to.1layhid4                      2.210497e+01
## campaign.to.1layhid4                      7.326520e+00
## pdays.to.1layhid4                        -1.630761e+01
## previous.to.1layhid4                     -9.858499e+01
## poutcomefailure.to.1layhid4               1.417611e+01
## poutcomenonexistent.to.1layhid4           3.544579e-01
## poutcomesuccess.to.1layhid4               7.931191e+01
## emp.var.rate.to.1layhid4                 -1.589710e+00
## cons.price.idx.to.1layhid4                2.319144e+01
## cons.conf.idx.to.1layhid4                 1.074224e+01
## euribor3m.to.1layhid4                    -1.959874e+01
## nr.employed.to.1layhid4                   6.535267e+00
## Intercept.to.1layhid5                    -1.454103e+00
## age.to.1layhid5                           1.181234e+02
## jobadmin..to.1layhid5                     1.023500e+00
## jobblue.collar.to.1layhid5               -3.077073e+01
## jobentrepreneur.to.1layhid5              -4.986039e+01
## jobhousemaid.to.1layhid5                 -8.542154e+01
## jobmanagement.to.1layhid5                -2.663899e+01
## jobretired.to.1layhid5                   -4.273841e+01
## jobself.employed.to.1layhid5              6.261677e+00
## jobservices.to.1layhid5                   4.192692e+01
## jobstudent.to.1layhid5                   -1.288770e+01
## jobtechnician.to.1layhid5                 1.258402e+01
## jobunemployed.to.1layhid5                 3.086085e+00
## maritaldivorced.to.1layhid5              -5.577489e+00
## maritalmarried.to.1layhid5               -3.732894e+00
## maritalsingle.to.1layhid5                -1.333950e-02
## educationbasic.4y.to.1layhid5            -4.629823e+00
## educationbasic.6y.to.1layhid5            -1.470269e+01
## educationbasic.9y.to.1layhid5             1.215317e+01
## educationhigh.school.to.1layhid5         -4.019384e+01
## educationilliterate.to.1layhid5          -1.615688e+02
## educationprofessional.course.to.1layhid5 -1.474576e+01
## educationuniversity.degree.to.1layhid5    5.864750e-01
## defaultno.to.1layhid5                    -2.432976e-01
## defaultyes.to.1layhid5                    1.579997e+02
## housingno.to.1layhid5                    -6.372306e+00
## housingyes.to.1layhid5                    7.497248e-01
## loanno.to.1layhid5                        4.565376e-01
## loanyes.to.1layhid5                       2.516680e+01
## contactcellular.to.1layhid5              -1.111856e+01
## contacttelephone.to.1layhid5              3.210341e+01
## monthapr.to.1layhid5                      4.448766e-03
## monthaug.to.1layhid5                     -7.335852e+01
## monthdec.to.1layhid5                     -8.244310e+01
## monthjul.to.1layhid5                     -1.754346e+01
## monthjun.to.1layhid5                     -1.363544e+01
## monthmar.to.1layhid5                     -1.390232e+03
## monthmay.to.1layhid5                      4.673555e+01
## monthnov.to.1layhid5                     -3.607367e+00
## monthoct.to.1layhid5                      3.065357e+02
## monthsep.to.1layhid5                      2.099520e+01
## day_of_weekfri.to.1layhid5                3.612040e+00
## day_of_weekmon.to.1layhid5                8.958457e-02
## day_of_weekthu.to.1layhid5               -1.180417e+01
## day_of_weektue.to.1layhid5               -7.610428e-02
## day_of_weekwed.to.1layhid5               -1.614685e+01
## duration.to.1layhid5                      1.095100e+03
## campaign.to.1layhid5                     -1.481037e+02
## pdays.to.1layhid5                         2.763216e-01
## previous.to.1layhid5                     -4.454451e+02
## poutcomefailure.to.1layhid5               5.094644e+01
## poutcomenonexistent.to.1layhid5           1.474048e+00
## poutcomesuccess.to.1layhid5              -8.198139e+00
## emp.var.rate.to.1layhid5                  7.904390e+00
## cons.price.idx.to.1layhid5               -2.241584e+02
## cons.conf.idx.to.1layhid5                -2.740269e+00
## euribor3m.to.1layhid5                     4.490410e+00
## nr.employed.to.1layhid5                   1.289901e+01
## Intercept.to.y                           -4.955647e-01
## 1layhid1.to.y                            -6.790349e+00
## 1layhid2.to.y                            -1.825784e+01
## 1layhid3.to.y                            -9.050872e+00
## 1layhid4.to.y                             2.474229e+00
## 1layhid5.to.y                            -1.250951e+00

Output layer pada hasil ANN data train ini adalah ada sebanyak 58 neuron di sebelah kiri (sesuai dengan jumlah peubah X) dengan target adalah Y di sebelah kanan sebanyak 1 neuron.

Nilai biasnya adalah:

Intercept.to.1layhid1                     1.444866e-01

Sedangkan variabel 1layhid adalah bobotnya dengan bobot terbesar adalah:

1layhid1.to.y                            -5.575475e+00

Yang artinya ini adalah kontributor terbesar. Kemudian nilai errornya adalah:

error                                     6.927831e+03

Dan Steps yang dicapai adalah:

steps                                     2.233100e+04

Prediksi

# Prediksi rating Neural Network Model 
output = compute(n, rep = 1, train_norm[, -1])
head(output$net.result)
##              [,1]
## [1,] 9.456773e-16
## [2,] 9.456773e-16
## [3,] 2.211753e-04
## [4,] 9.456773e-16
## [5,] 9.456773e-16
## [6,] 9.456773e-16
head(train_norm[1, ])
##         age jobadmin. jobblue.collar jobentrepreneur jobhousemaid jobmanagement
## 1 0.4814815         0              0               0            1             0
##   jobretired jobself.employed jobservices jobstudent jobtechnician
## 1          0                0           0          0             0
##   jobunemployed maritaldivorced maritalmarried maritalsingle educationbasic.4y
## 1             0               0              1             0                 1
##   educationbasic.6y educationbasic.9y educationhigh.school educationilliterate
## 1                 0                 0                    0                   0
##   educationprofessional.course educationuniversity.degree defaultno defaultyes
## 1                            0                          0         1          0
##   housingno housingyes loanno loanyes contactcellular contacttelephone monthapr
## 1         1          0      1       0               0                1        0
##   monthaug monthdec monthjul monthjun monthmar monthmay monthnov monthoct
## 1        0        0        0        0        0        1        0        0
##   monthsep day_of_weekfri day_of_weekmon day_of_weekthu day_of_weektue
## 1        0              0              1              0              0
##   day_of_weekwed   duration campaign pdays previous poutcomefailure
## 1              0 0.05307035        0     1        0               0
##   poutcomenonexistent poutcomesuccess emp.var.rate cons.price.idx cons.conf.idx
## 1                   1               0       0.9375      0.6987529     0.6025105
##   euribor3m nr.employed y
## 1 0.9573793   0.8597353 0
head(test_norm[1, ])
##         age jobadmin. jobblue.collar jobentrepreneur jobhousemaid jobmanagement
## 1 0.1714286         0              1               0            0             0
##   jobretired jobself.employed jobservices jobstudent jobtechnician
## 1          0                0           0          0             0
##   jobunemployed maritaldivorced maritalmarried maritalsingle educationbasic.4y
## 1             0               0              1             0                 0
##   educationbasic.6y educationbasic.9y educationhigh.school educationilliterate
## 1                 0                 1                    0                   0
##   educationprofessional.course educationuniversity.degree defaultno defaultyes
## 1                            0                          0         1          0
##   housingno housingyes loanno loanyes contactcellular contacttelephone monthapr
## 1         0          1      1       0               1                0        0
##   monthaug monthdec monthjul monthjun monthmar monthmay monthnov monthoct
## 1        0        0        0        0        0        1        0        0
##   monthsep day_of_weekfri day_of_weekmon day_of_weekthu day_of_weektue
## 1        0              1              0              0              0
##   day_of_weekwed duration   campaign pdays previous poutcomefailure
## 1              0 0.133681 0.02941176     1        0               0
##   poutcomenonexistent poutcomesuccess emp.var.rate cons.price.idx cons.conf.idx
## 1                   1               0    0.3333333      0.2696804     0.1924686
##   euribor3m nr.employed y
## 1 0.1537415   0.5122873 0

Data Training

# Cari peluang untuk data train
prob_train = compute(n, train_norm[, -which(names(train_norm) == "y")])$net.result

pred_y_train_f = ifelse(prob_train > 0.5, "Yes", "No") # menghasilkan biner 0 atau 1
pred_y_train_f = factor(pred_y_train_f, levels = c("No", "Yes"))

# Data Asil
aktual_y_train_f = factor(train$y, levels = c(0, 1), labels = c("No", "Yes"))

Data Testing

# Hitung eluang untuk data test
prob_test = compute(n, test_norm[, -which(names(test_norm) == "y")])$net.result

# Ubah menjadi factor
pred_y_test_f = ifelse(prob_test > 0.5, "Yes", "No")
pred_y_test_f = factor(pred_y_test_f, levels = c(0, 1), labels = c("No", "Yes"))

# Data asli
aktual_y_test_f = factor(test_norm$y, levels = c(0, 1), labels = c("No", "Yes"))

Cek Akurasi

Data Trainning

summary.train = confusionMatrix(pred_y_train_f, aktual_y_train_f, positive = "Yes", mode = "everything")

# Tampilkan Statistik Lengkap Training
cat("--- STATISTICS DATA TRAINING ---\n")
## --- STATISTICS DATA TRAINING ---
print(summary.train)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    No   Yes
##        No  35113  1870
##        Yes  1435  2770
##                                           
##                Accuracy : 0.9198          
##                  95% CI : (0.9171, 0.9224)
##     No Information Rate : 0.8873          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5815          
##                                           
##  Mcnemar's Test P-Value : 4.378e-14       
##                                           
##             Sensitivity : 0.59698         
##             Specificity : 0.96074         
##          Pos Pred Value : 0.65874         
##          Neg Pred Value : 0.94944         
##               Precision : 0.65874         
##                  Recall : 0.59698         
##                      F1 : 0.62634         
##              Prevalence : 0.11265         
##          Detection Rate : 0.06725         
##    Detection Prevalence : 0.10209         
##       Balanced Accuracy : 0.77886         
##                                           
##        'Positive' Class : Yes             
## 
Accuracy : 0.8902 (overall corectness)
Sensitivity : 0.034483 (finding positives) 
Precision : 0.796020
F1 : 0.066102
  • Akurasi (0.89%): secara keseluruhan model berhasil menebak klasifikasi nasabah yang mau atau tidak berlangganan.

  • Precision (0.79%): Keakuratan tebakan model benar terhadap prediksinya cukup akurat dengan nilai 0.79% cukup tinggi.

  • Sensitivity/Recall (0.03%): Dari seluruh nasabah yang benar-benar mau berlangganan ditebak dengan benar oleh model sebesar 0.03%, nilai ini cukup kecil.

  • F-1 Score (0.06%): Nilai rataan harmonik antara precision dan recall, nilai ini menunjukkan bahwa 0.06% adalah sebagai performa model yang artinya model masih sangat kesulitan untuk menyeimbangkan antara False Positive dan False Negative.

Data Testing

# Tampilkan Statistik Lengkap Testing 
cat("--- SUMMARY STATISTICS: DATA TESTING ---\n")
## --- SUMMARY STATISTICS: DATA TESTING ---
summary.test = confusionMatrix(pred_y_test_f, aktual_y_test_f, 
                                positive = "Yes", 
                                mode = "everything")
print(summary.test)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction No Yes
##        No   0   0
##        Yes  0   0
##                                   
##                Accuracy : NaN     
##                  95% CI : (NA, NA)
##     No Information Rate : NA      
##     P-Value [Acc > NIR] : NA      
##                                   
##                   Kappa : NaN     
##                                   
##  Mcnemar's Test P-Value : NA      
##                                   
##             Sensitivity :  NA     
##             Specificity :  NA     
##          Pos Pred Value :  NA     
##          Neg Pred Value :  NA     
##               Precision :  NA     
##                  Recall :  NA     
##                      F1 :  NA     
##              Prevalence : NaN     
##          Detection Rate : NaN     
##    Detection Prevalence : NaN     
##       Balanced Accuracy :  NA     
##                                   
##        'Positive' Class : Yes     
## 
Accuracy : 0.9051 (overall corectness)
Precision : 0.54839 
Sensitivity : 0.75388 (finding positives)        
Specificity : 0.92366 (finding negatives)
F1 : 0.63492
  • Akurasi (0.90%): Secara keseluruhan, model menebak data test cukup baik dengan nilai akurasi yang cukup tinggi.

  • Precision (0.54%): Keakuratan tebakan model benar terhadap prediksinya cukup akurat dengan nilai 0.54% cukup tinggi.

  • Recall/Sensitivity (0.75%): Dari seluruh nasabah yang benar-benar mau berlangganan ditebak dengan benar oleh model sebesar 0.75%, nilai ini cukup kecil.

  • F-1 Score (0.63%): Nilai rataan harmonik antara precision dan recall, nilai ini menunjukkan bahwa 0.63% adalah sebagai performa model yang artinya model sudah cukup baik untuk menyeimbangkan antara False Positive dan False Negative.

# Ambil nilai akurasi dari summary
accuracy = summary.train$overall['Accuracy']
accuracy_test = summary.test$overall['Accuracy']

# Membuat data frame untuk grafik
performa = data.frame(
  Dataset = c("Training", "Testing"),
  Akurasi = c(accuracy, accuracy_test)
)
# Visualisasi
barplot(performa$Akurasi, 
        names.arg = performa$Dataset, 
        ylim = c(0, 1), col = c("skyblue", "orange"),
        main = "Perbandingan Akurasi Model",
        ylab = "Nilai Akurasi")
abline(h = 0.9, col = "red", lty = 2) # Garis batas 90%

Berdasarkan nilai akurasi dari nilai metrik menunjukkan bahwa model masih kurang baik dalam menebak data train sedangkan model sudah cukup baik dalam menebak data test. Hal ini dibuktikan dengan nilai akurasi pada confussion matri yang menunjukkan bahwa nilai metrik dalam data test lebih masuk akal.

Maksudnya, nilai pada data test ini selaras, ketika akurasi, recall/sensitivity, dan F-1 Score itu semua tinggi. Sedangkan pada data train, hanya akurasi dan presisi saja yang tinggi tapi nilai F1-Score dan sensitivity sangat rendah. Artinya pada data train model masih sulit untuk menebak target.

Hasil

# Ambil hasil prediksi (dalam bentuk peluang)
prob_test = compute(n, test_norm[, -which(names(test_norm) == "y")])$net.result

# Ubah peluang menjadi label "Yes" atau "No" (Threshold 0.5)
prediksi_label = ifelse(prob_test > 0.5, "Yes", "No")

# abungkan dengan data test asli (sebelum normalisasi agar mudah dibaca)
hasil_akhir = data.frame(test, Prediksi_Model = prediksi_label)
# Jika ingin membuka jendela data khusus dua kolom ini
View(hasil_akhir[, c("age", "Prediksi_Model")])
# Jumlah dan persentase Yes dan No
library(dplyr)
hasil_akhir %>%
  group_by(Prediksi_Model) %>%
  summarise(Jumlah = n(),
            Persentase = (n() / nrow(hasil_akhir)) * 100)
## # A tibble: 2 × 3
##   Prediksi_Model Jumlah Persentase
##   <chr>           <int>      <dbl>
## 1 No               3501       85.0
## 2 Yes               618       15.0
# Simpan hasil prediksi
write.csv(hasil_akhir, "Hasil_Prediksi_Nasabah.csv", row.names = FALSE)