library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#training = read.csv("D:\\Semester 6\\TPM\\Kuliah\\Tugas Individu\\Data Training.csv", sep = ";", stringsAsFactors = TRUE)
#testing = read.csv("D:\\Semester 6\\TPM\\Kuliah\\Tugas Individu\\Data Testing.csv", sep = ";", stringsAsFactors = TRUE)
# Simpan data training ke folder tertentu
#write.csv(training, "Data_Training_Clean.csv", row.names = FALSE)
# Simpan data testing ke folder tertentu
#write.csv(testing, "Data_Testing_Clean.csv", row.names = FALSE)
# Input Data Training
train = read.csv("D:\\Semester 6\\TPM\\Kuliah\\Tugas Individu\\Data_Training_Clean.csv")
# Input Data Testing
test = read.csv("D:\\Semester 6\\TPM\\Kuliah\\Tugas Individu\\Data_Testing_Clean.csv")
str(train)
## 'data.frame': 41188 obs. of 21 variables:
## $ age : int 56 57 37 40 56 45 59 41 24 25 ...
## $ job : chr "housemaid" "services" "services" "admin." ...
## $ marital : chr "married" "married" "married" "married" ...
## $ education : chr "basic.4y" "high.school" "high.school" "basic.6y" ...
## $ default : chr "no" "" "no" "no" ...
## $ housing : chr "no" "no" "yes" "no" ...
## $ loan : chr "no" "no" "no" "no" ...
## $ contact : chr "telephone" "telephone" "telephone" "telephone" ...
## $ month : chr "may" "may" "may" "may" ...
## $ day_of_week : chr "mon" "mon" "mon" "mon" ...
## $ duration : int 261 149 226 151 307 198 139 217 380 50 ...
## $ campaign : int 1 1 1 1 1 1 1 1 1 1 ...
## $ pdays : int 999 999 999 999 999 999 999 999 999 999 ...
## $ previous : int 0 0 0 0 0 0 0 0 0 0 ...
## $ poutcome : chr "nonexistent" "nonexistent" "nonexistent" "nonexistent" ...
## $ emp.var.rate : num 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 ...
## $ cons.price.idx: num 94 94 94 94 94 ...
## $ cons.conf.idx : num -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 ...
## $ euribor3m : num 4.86 4.86 4.86 4.86 4.86 ...
## $ nr.employed : num 5191 5191 5191 5191 5191 ...
## $ y : chr "no" "no" "no" "no" ...
head(train)
## age job marital education default housing loan contact month
## 1 56 housemaid married basic.4y no no no telephone may
## 2 57 services married high.school no no telephone may
## 3 37 services married high.school no yes no telephone may
## 4 40 admin. married basic.6y no no no telephone may
## 5 56 services married high.school no no yes telephone may
## 6 45 services married basic.9y no no telephone may
## day_of_week duration campaign pdays previous poutcome emp.var.rate
## 1 mon 261 1 999 0 nonexistent 1.1
## 2 mon 149 1 999 0 nonexistent 1.1
## 3 mon 226 1 999 0 nonexistent 1.1
## 4 mon 151 1 999 0 nonexistent 1.1
## 5 mon 307 1 999 0 nonexistent 1.1
## 6 mon 198 1 999 0 nonexistent 1.1
## cons.price.idx cons.conf.idx euribor3m nr.employed y
## 1 93.994 -36.4 4.857 5191 no
## 2 93.994 -36.4 4.857 5191 no
## 3 93.994 -36.4 4.857 5191 no
## 4 93.994 -36.4 4.857 5191 no
## 5 93.994 -36.4 4.857 5191 no
## 6 93.994 -36.4 4.857 5191 no
str(test)
## 'data.frame': 4119 obs. of 21 variables:
## $ age : int 30 39 25 38 47 32 32 41 31 35 ...
## $ job : chr "blue-collar" "services" "services" "services" ...
## $ marital : chr "married" "single" "married" "married" ...
## $ education : chr "basic.9y" "high.school" "high.school" "basic.9y" ...
## $ default : chr "no" "no" "no" "no" ...
## $ housing : chr "yes" "no" "yes" "" ...
## $ loan : chr "no" "no" "no" "" ...
## $ contact : chr "cellular" "telephone" "telephone" "telephone" ...
## $ month : chr "may" "may" "jun" "jun" ...
## $ day_of_week : chr "fri" "fri" "wed" "fri" ...
## $ duration : int 487 346 227 17 58 128 290 44 68 170 ...
## $ campaign : int 2 4 1 3 1 3 4 2 1 1 ...
## $ pdays : int 999 999 999 999 999 999 999 999 999 999 ...
## $ previous : int 0 0 0 0 0 2 0 0 1 0 ...
## $ poutcome : chr "nonexistent" "nonexistent" "nonexistent" "nonexistent" ...
## $ emp.var.rate : num -1.8 1.1 1.4 1.4 -0.1 -1.1 -1.1 -0.1 -0.1 1.1 ...
## $ cons.price.idx: num 92.9 94 94.5 94.5 93.2 ...
## $ cons.conf.idx : num -46.2 -36.4 -41.8 -41.8 -42 -37.5 -37.5 -42 -42 -36.4 ...
## $ euribor3m : num 1.31 4.86 4.96 4.96 4.19 ...
## $ nr.employed : num 5099 5191 5228 5228 5196 ...
## $ y : chr "no" "no" "no" "no" ...
head(test)
## age job marital education default housing loan contact
## 1 30 blue-collar married basic.9y no yes no cellular
## 2 39 services single high.school no no no telephone
## 3 25 services married high.school no yes no telephone
## 4 38 services married basic.9y no telephone
## 5 47 admin. married university.degree no yes no cellular
## 6 32 services single university.degree no no no cellular
## month day_of_week duration campaign pdays previous poutcome emp.var.rate
## 1 may fri 487 2 999 0 nonexistent -1.8
## 2 may fri 346 4 999 0 nonexistent 1.1
## 3 jun wed 227 1 999 0 nonexistent 1.4
## 4 jun fri 17 3 999 0 nonexistent 1.4
## 5 nov mon 58 1 999 0 nonexistent -0.1
## 6 sep thu 128 3 999 2 failure -1.1
## cons.price.idx cons.conf.idx euribor3m nr.employed y
## 1 92.893 -46.2 1.313 5099.1 no
## 2 93.994 -36.4 4.855 5191.0 no
## 3 94.465 -41.8 4.962 5228.1 no
## 4 94.465 -41.8 4.959 5228.1 no
## 5 93.200 -42.0 4.191 5195.8 no
## 6 94.199 -37.5 0.884 4963.6 no
nrow(train)
## [1] 41188
nrow(test)
## [1] 4119
ncol(train)
## [1] 21
ncol(test)
## [1] 21
duration# Hapus kolom `duration`
train$duration = NULL
test$duration = NULL
# Cek apakah masih ada kolom duration
"duration" %in% names(train)
## [1] FALSE
"duration" %in% names(test)
## [1] FALSE
# Definisikan NA
train = read.csv("D:/Semester 6/TPM/Kuliah/Tugas Individu/Data_Training_Clean.csv",
na.strings = c("", "NA", " ", "unknown"))
test = read.csv("D:/Semester 6/TPM/Kuliah/Tugas Individu/Data_Testing_Clean.csv",
na.strings = c("", "NA", " ", "unknown"))
# Cek missing Value
sum(is.na(train))
## [1] 12718
sum(is.na(test))
## [1] 1230
Ada 12.718 NA pada data train dan 1.230 NA pada data test. Data ini tidak bisa langsung dihapus mengingat risiko yang besar untuk kehilangan informasi penting. Seningga akan dilakukan imputasi dengan metode KNN (K-Nearest Neighbor).
# Melihat jumlah NA di setiap kolom pada data Training
na_train = colSums(is.na(train))
na_train[na_train > 0] # Hanya tampilkan kolom yang ada NA-nya
## job marital education default housing loan
## 330 80 1731 8597 990 990
# Melihat jumlah NA di setiap kolom pada data Testing
na_test = colSums(is.na(test))
na_test[na_test > 0]
## job marital education default housing loan
## 39 11 167 803 105 105
Data kosong diisi dengan pendekatan KNN, akan memprediksi nilai kosong berdasarkan amatan lain yang mirip secara karakter/profilnya atau secara kemiripan di peubah X yang lain.
# KNN (K-Nearest Neighbor)
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
# Imputasi data Testing
train_imputed = kNN(train, k = 5) # k=5 --> melihat 5 tetangga terdekat yang paling mirip
# Imputasi data Testing
test_imputed = kNN(test, k = 5)
# Hapus kolom tambahan yang dibuat oleh VIM
train = train_imputed[, 1:20] # Sesuaikan dengan jumlah kolom asli kamu
test = test_imputed[, 1:20]
# Sekarang cek NA
sum(is.na(train))
## [1] 0
# Menghapus hanya kolom yang berakhiran "_imp" hasil dari kNN
train = train_imputed[, !grepl("_imp", names(train_imputed))]
test = test_imputed[, !grepl("_imp", names(test_imputed))]
# Pastikan variabel y adalah faktor (penting untuk dummyVars)
train$y = as.factor(train$y)
test$y = as.factor(test$y)
names(train)
## [1] "age" "job" "marital" "education"
## [5] "default" "housing" "loan" "contact"
## [9] "month" "day_of_week" "duration" "campaign"
## [13] "pdays" "previous" "poutcome" "emp.var.rate"
## [17] "cons.price.idx" "cons.conf.idx" "euribor3m" "nr.employed"
## [21] "y"
"y" %in% names(train)
## [1] TRUE
Mengubah kategori menjadi numerik untuk bisa dibaca oleh ANN.
# Mengubah kolom kategori menjadi biner (1 atau 0)
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
# Buat dummy utnuk peubah prediktor
dmy = dummyVars(" ~ . -y", data = train) # dummy kecuali untuk Y
train.x = data.frame(predict(dmy, newdata = train))
test.x = data.frame(predict(dmy, newdata = test))
# Masukkan Y kembali ke dalam data
train.y = ifelse(train$y == "yes", 1, 0)
test.y = ifelse(test$y == "yes", 1, 0)
# Menggabungkan kembali data
train = cbind(train.x, y = train.y)
test = cbind(test.x, y = test.y)
# Cek kembali data
head(train)
## age jobadmin. jobblue.collar jobentrepreneur jobhousemaid jobmanagement
## 1 56 0 0 0 1 0
## 2 57 0 0 0 0 0
## 3 37 0 0 0 0 0
## 4 40 1 0 0 0 0
## 5 56 0 0 0 0 0
## 6 45 0 0 0 0 0
## jobretired jobself.employed jobservices jobstudent jobtechnician
## 1 0 0 0 0 0
## 2 0 0 1 0 0
## 3 0 0 1 0 0
## 4 0 0 0 0 0
## 5 0 0 1 0 0
## 6 0 0 1 0 0
## jobunemployed maritaldivorced maritalmarried maritalsingle educationbasic.4y
## 1 0 0 1 0 1
## 2 0 0 1 0 0
## 3 0 0 1 0 0
## 4 0 0 1 0 0
## 5 0 0 1 0 0
## 6 0 0 1 0 0
## educationbasic.6y educationbasic.9y educationhigh.school educationilliterate
## 1 0 0 0 0
## 2 0 0 1 0
## 3 0 0 1 0
## 4 1 0 0 0
## 5 0 0 1 0
## 6 0 1 0 0
## educationprofessional.course educationuniversity.degree defaultno defaultyes
## 1 0 0 1 0
## 2 0 0 1 0
## 3 0 0 1 0
## 4 0 0 1 0
## 5 0 0 1 0
## 6 0 0 1 0
## housingno housingyes loanno loanyes contactcellular contacttelephone monthapr
## 1 1 0 1 0 0 1 0
## 2 1 0 1 0 0 1 0
## 3 0 1 1 0 0 1 0
## 4 1 0 1 0 0 1 0
## 5 1 0 0 1 0 1 0
## 6 1 0 1 0 0 1 0
## monthaug monthdec monthjul monthjun monthmar monthmay monthnov monthoct
## 1 0 0 0 0 0 1 0 0
## 2 0 0 0 0 0 1 0 0
## 3 0 0 0 0 0 1 0 0
## 4 0 0 0 0 0 1 0 0
## 5 0 0 0 0 0 1 0 0
## 6 0 0 0 0 0 1 0 0
## monthsep day_of_weekfri day_of_weekmon day_of_weekthu day_of_weektue
## 1 0 0 1 0 0
## 2 0 0 1 0 0
## 3 0 0 1 0 0
## 4 0 0 1 0 0
## 5 0 0 1 0 0
## 6 0 0 1 0 0
## day_of_weekwed duration campaign pdays previous poutcomefailure
## 1 0 261 1 999 0 0
## 2 0 149 1 999 0 0
## 3 0 226 1 999 0 0
## 4 0 151 1 999 0 0
## 5 0 307 1 999 0 0
## 6 0 198 1 999 0 0
## poutcomenonexistent poutcomesuccess emp.var.rate cons.price.idx cons.conf.idx
## 1 1 0 1.1 93.994 -36.4
## 2 1 0 1.1 93.994 -36.4
## 3 1 0 1.1 93.994 -36.4
## 4 1 0 1.1 93.994 -36.4
## 5 1 0 1.1 93.994 -36.4
## 6 1 0 1.1 93.994 -36.4
## euribor3m nr.employed y
## 1 4.857 5191 0
## 2 4.857 5191 0
## 3 4.857 5191 0
## 4 4.857 5191 0
## 5 4.857 5191 0
## 6 4.857 5191 0
head(test)
## age jobadmin. jobblue.collar jobentrepreneur jobhousemaid jobmanagement
## 1 30 0 1 0 0 0
## 2 39 0 0 0 0 0
## 3 25 0 0 0 0 0
## 4 38 0 0 0 0 0
## 5 47 1 0 0 0 0
## 6 32 0 0 0 0 0
## jobretired jobself.employed jobservices jobstudent jobtechnician
## 1 0 0 0 0 0
## 2 0 0 1 0 0
## 3 0 0 1 0 0
## 4 0 0 1 0 0
## 5 0 0 0 0 0
## 6 0 0 1 0 0
## jobunemployed maritaldivorced maritalmarried maritalsingle educationbasic.4y
## 1 0 0 1 0 0
## 2 0 0 0 1 0
## 3 0 0 1 0 0
## 4 0 0 1 0 0
## 5 0 0 1 0 0
## 6 0 0 0 1 0
## educationbasic.6y educationbasic.9y educationhigh.school educationilliterate
## 1 0 1 0 0
## 2 0 0 1 0
## 3 0 0 1 0
## 4 0 1 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## educationprofessional.course educationuniversity.degree defaultno defaultyes
## 1 0 0 1 0
## 2 0 0 1 0
## 3 0 0 1 0
## 4 0 0 1 0
## 5 0 1 1 0
## 6 0 1 1 0
## housingno housingyes loanno loanyes contactcellular contacttelephone monthapr
## 1 0 1 1 0 1 0 0
## 2 1 0 1 0 0 1 0
## 3 0 1 1 0 0 1 0
## 4 0 1 1 0 0 1 0
## 5 0 1 1 0 1 0 0
## 6 1 0 1 0 1 0 0
## monthaug monthdec monthjul monthjun monthmar monthmay monthnov monthoct
## 1 0 0 0 0 0 1 0 0
## 2 0 0 0 0 0 1 0 0
## 3 0 0 0 1 0 0 0 0
## 4 0 0 0 1 0 0 0 0
## 5 0 0 0 0 0 0 1 0
## 6 0 0 0 0 0 0 0 0
## monthsep day_of_weekfri day_of_weekmon day_of_weekthu day_of_weektue
## 1 0 1 0 0 0
## 2 0 1 0 0 0
## 3 0 0 0 0 0
## 4 0 1 0 0 0
## 5 0 0 1 0 0
## 6 1 0 0 1 0
## day_of_weekwed duration campaign pdays previous poutcomefailure
## 1 0 487 2 999 0 0
## 2 0 346 4 999 0 0
## 3 1 227 1 999 0 0
## 4 0 17 3 999 0 0
## 5 0 58 1 999 0 0
## 6 0 128 3 999 2 1
## poutcomenonexistent poutcomesuccess emp.var.rate cons.price.idx cons.conf.idx
## 1 1 0 -1.8 92.893 -46.2
## 2 1 0 1.1 93.994 -36.4
## 3 1 0 1.4 94.465 -41.8
## 4 1 0 1.4 94.465 -41.8
## 5 1 0 -0.1 93.200 -42.0
## 6 0 0 -1.1 94.199 -37.5
## euribor3m nr.employed y
## 1 1.313 5099.1 0
## 2 4.855 5191.0 0
## 3 4.962 5228.1 0
## 4 4.959 5228.1 0
## 5 4.191 5195.8 0
## 6 0.884 4963.6 0
Min-Max scalling untuk mengubah nilai dalam rentang 0 s.d 1, agar dalam penghitungan setiap variabelnya dalam skala yang sama.
# Normalisasi
normalize = function(x){
return((x - min(x)) / (max(x) - min(x)))
}
# Lakukan pada peubah X
train_norm = as.data.frame(lapply(train, normalize))
test_norm = as.data.frame(lapply(test, normalize))
summary(train_norm)
## age jobadmin. jobblue.collar jobentrepreneur
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.1852 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.2593 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.2842 Mean :0.2541 Mean :0.2285 Mean :0.03545
## 3rd Qu.:0.3704 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## jobhousemaid jobmanagement jobretired jobself.employed
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.00000
## Mean :0.02586 Mean :0.07128 Mean :0.0422 Mean :0.03455
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.0000 Max. :1.00000
## jobservices jobstudent jobtechnician jobunemployed
## Min. :0.00000 Min. :0.00000 Min. :0.000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.000 Median :0.00000
## Mean :0.09699 Mean :0.02132 Mean :0.165 Mean :0.02472
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.000 Max. :1.00000
## maritaldivorced maritalmarried maritalsingle educationbasic.4y
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.000 Median :1.0000 Median :0.0000 Median :0.0000
## Mean :0.112 Mean :0.6068 Mean :0.2812 Mean :0.1082
## 3rd Qu.:0.000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## educationbasic.6y educationbasic.9y educationhigh.school educationilliterate
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.000000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :0.000000
## Mean :0.05776 Mean :0.1546 Mean :0.2411 Mean :0.000437
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.000000
## Max. :1.00000 Max. :1.0000 Max. :1.0000 Max. :1.000000
## educationprofessional.course educationuniversity.degree defaultno
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:1.0000
## Median :0.0000 Median :0.0000 Median :1.0000
## Mean :0.1312 Mean :0.3067 Mean :0.9999
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## defaultyes housingno housingyes loanno
## Min. :0.000e+00 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000e+00 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:1.0000
## Median :0.000e+00 Median :0.0000 Median :1.0000 Median :1.0000
## Mean :7.284e-05 Mean :0.4634 Mean :0.5366 Mean :0.8478
## 3rd Qu.:0.000e+00 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.000e+00 Max. :1.0000 Max. :1.0000 Max. :1.0000
## loanyes contactcellular contacttelephone monthapr
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :1.0000 Median :0.0000 Median :0.0000
## Mean :0.1522 Mean :0.6347 Mean :0.3653 Mean :0.0639
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## monthaug monthdec monthjul monthjun
## Min. :0.00 Min. :0.000000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00 1st Qu.:0.000000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00 Median :0.000000 Median :0.0000 Median :0.0000
## Mean :0.15 Mean :0.004419 Mean :0.1742 Mean :0.1291
## 3rd Qu.:0.00 3rd Qu.:0.000000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.00 Max. :1.000000 Max. :1.0000 Max. :1.0000
## monthmar monthmay monthnov monthoct
## Min. :0.00000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.01326 Mean :0.3343 Mean :0.09957 Mean :0.01743
## 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.0000 Max. :1.00000 Max. :1.00000
## monthsep day_of_weekfri day_of_weekmon day_of_weekthu
## Min. :0.00000 Min. :0.00 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.00 Median :0.0000 Median :0.0000
## Mean :0.01384 Mean :0.19 Mean :0.2067 Mean :0.2094
## 3rd Qu.:0.00000 3rd Qu.:0.00 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.00 Max. :1.0000 Max. :1.0000
## day_of_weektue day_of_weekwed duration campaign
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.02074 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.03660 Median :0.01818
## Mean :0.1964 Mean :0.1975 Mean :0.05252 Mean :0.02850
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.06486 3rd Qu.:0.03636
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.00000
## pdays previous poutcomefailure poutcomenonexistent
## Min. :0.0000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:1.0000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:1.0000
## Median :1.0000 Median :0.00000 Median :0.0000 Median :1.0000
## Mean :0.9634 Mean :0.02471 Mean :0.1032 Mean :0.8634
## 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.00000 Max. :1.0000 Max. :1.0000
## poutcomesuccess emp.var.rate cons.price.idx cons.conf.idx
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.3333 1st Qu.:0.3406 1st Qu.:0.3389
## Median :0.00000 Median :0.9375 Median :0.6033 Median :0.3766
## Mean :0.03333 Mean :0.7254 Mean :0.5357 Mean :0.4309
## 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0.6988 3rd Qu.:0.6025
## Max. :1.00000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## euribor3m nr.employed y
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.1610 1st Qu.:0.5123 1st Qu.:0.0000
## Median :0.9574 Median :0.8597 Median :0.0000
## Mean :0.6772 Mean :0.7691 Mean :0.1127
## 3rd Qu.:0.9810 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000
summary(test_norm)
## age jobadmin. jobblue.collar jobentrepreneur
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.2000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.2857 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.3159 Mean :0.2464 Mean :0.2195 Mean :0.03593
## 3rd Qu.:0.4143 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## jobhousemaid jobmanagement jobretired jobself.employed
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.02695 Mean :0.07939 Mean :0.04054 Mean :0.0386
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.0000
## jobservices jobstudent jobtechnician jobunemployed
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.00000
## Mean :0.09663 Mean :0.01991 Mean :0.1692 Mean :0.02695
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.0000 Max. :1.00000
## maritaldivorced maritalmarried maritalsingle educationbasic.4y
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :1.0000 Median :0.0000 Median :0.000
## Mean :0.1083 Mean :0.6113 Mean :0.2804 Mean :0.109
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000
## educationbasic.6y educationbasic.9y educationhigh.school educationilliterate
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000000
## Mean :0.0573 Mean :0.1466 Mean :0.2314 Mean :0.0002428
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000000
## educationprofessional.course educationuniversity.degree defaultno
## Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:1.0000
## Median :0.0000 Median :0.000 Median :1.0000
## Mean :0.1345 Mean :0.321 Mean :0.9998
## 3rd Qu.:0.0000 3rd Qu.:1.000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.000 Max. :1.0000
## defaultyes housingno housingyes loanno
## Min. :0.0000000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:1.0000
## Median :0.0000000 Median :0.0000 Median :1.0000 Median :1.0000
## Mean :0.0002428 Mean :0.4574 Mean :0.5426 Mean :0.8376
## 3rd Qu.:0.0000000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## loanyes contactcellular contacttelephone monthapr
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :1.0000 Median :0.0000 Median :0.0000
## Mean :0.1624 Mean :0.6438 Mean :0.3562 Mean :0.0522
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## monthaug monthdec monthjul monthjun
## Min. :0.0000 Min. :0.000000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.000000 Median :0.0000 Median :0.0000
## Mean :0.1544 Mean :0.005341 Mean :0.1726 Mean :0.1287
## 3rd Qu.:0.0000 3rd Qu.:0.000000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.000000 Max. :1.0000 Max. :1.0000
## monthmar monthmay monthnov monthoct
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.01165 Mean :0.3345 Mean :0.1083 Mean :0.01675
## 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## monthsep day_of_weekfri day_of_weekmon day_of_weekthu
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.01554 Mean :0.1865 Mean :0.2076 Mean :0.2088
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## day_of_weektue day_of_weekwed duration campaign
## Min. :0.0000 Min. :0.000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.02827 1st Qu.:0.00000
## Median :0.0000 Median :0.000 Median :0.04968 Median :0.02941
## Mean :0.2042 Mean :0.193 Mean :0.07049 Mean :0.04521
## 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.08702 3rd Qu.:0.05882
## Max. :1.0000 Max. :1.000 Max. :1.00000 Max. :1.00000
## pdays previous poutcomefailure poutcomenonexistent
## Min. :0.0000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:1.0000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:1.0000
## Median :1.0000 Median :0.00000 Median :0.0000 Median :1.0000
## Mean :0.9614 Mean :0.03172 Mean :0.1102 Mean :0.8553
## 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.00000 Max. :1.0000 Max. :1.0000
## poutcomesuccess emp.var.rate cons.price.idx cons.conf.idx
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.3333 1st Qu.:0.3406 1st Qu.:0.3389
## Median :0.00000 Median :0.9375 Median :0.6033 Median :0.3766
## Mean :0.03447 Mean :0.7260 Mean :0.5373 Mean :0.4310
## 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0.6988 3rd Qu.:0.6025
## Max. :1.00000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## euribor3m nr.employed y
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.1585 1st Qu.:0.5123 1st Qu.:0.0000
## Median :0.9574 Median :0.8597 Median :0.0000
## Mean :0.6772 Mean :0.7670 Mean :0.1095
## 3rd Qu.:0.9810 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000
Menggunakan package neuralnet dengan 5 neuron dipilih
untuk menjaga model tetap stabil dengan kondisi data ini dan 1 layer
dipilih karena dalam analisis ini dipakai rprop yang cukup stabil dan
robust (* akan terbukti di hasil analisisnya nanti pada data ini).
library(neuralnet)
##
## Attaching package: 'neuralnet'
## The following object is masked from 'package:dplyr':
##
## compute
set.seed(333)
n = neuralnet(y ~ .,
data = train_norm, # Pakai data yang sudah dinormalisasi
hidden = 5, # 5 neuron dengan 1 hidden layer
err.fct = "ce",
linear.output = FALSE,
lifesign = 'full', # Akan memunculkan progress hitungan di console
rep = 1, # Melakukan pengulangan 1x
threshold = 0.5,
algorithm = "rprop+", # Backpropagation
stepmax = 1000000) # Stepmax diperbesar agar model bisa bekerja lebih baik
## hidden: 5 thresh: 0.5 rep: 1/1 steps:
## 1000 min thresh: 4.51756192957293
## 2000 min thresh: 2.6940431483678
## 3000 min thresh: 2.04183640530384
## 4000 min thresh: 1.53158637503712
## 5000 min thresh: 1.22760204798536
## 6000 min thresh: 1.07019060866538
## 7000 min thresh: 1.02445317332697
## 8000 min thresh: 1.01035960120116
## 9000 min thresh: 1.01035960120116
## 10000 min thresh: 1.01035960120116
## 11000 min thresh: 1.01035960120116
## 12000 min thresh: 1.01035960120116
## 13000 min thresh: 1.01035960120116
## 14000 min thresh: 1.01035960120116
## 15000 min thresh: 1.01035960120116
## 16000 min thresh: 1.01035960120116
## 17000 min thresh: 1.01035960120116
## 18000 min thresh: 1.01035960120116
## 19000 min thresh: 1.01035960120116
## 20000 min thresh: 1.01035960120116
## 21000 min thresh: 1.00331206301678
## 22000 min thresh: 0.950637281701579
## 23000 min thresh: 0.869661429369372
## 24000 min thresh: 0.830525055428064
## 25000 min thresh: 0.737456086044859
## 26000 min thresh: 0.737456086044859
## 27000 min thresh: 0.648000240545284
## 28000 min thresh: 0.648000240545284
## 29000 min thresh: 0.629067224711656
## 30000 min thresh: 0.614562138274845
## 31000 min thresh: 0.576874195513038
## 32000 min thresh: 0.576874195513038
## 33000 min thresh: 0.547752033409284
## 34000 min thresh: 0.540854212175597
## 35000 min thresh: 0.535587507244335
## 35357 error: 6974.57901 time: 1.08 hours
Model melakukan 22.331 kali iterasi backpropagation untuk mengevaluasi dan memperbaiki bobot. Error yang dihasilkan adalah 6927.83 (ce = Cross-Entropy), nilai ini merupakan akumulasi dari seluruh observasi data train sebanyak 41.188 amatan, nilai ini cukup wajar jika dilihat dari banyaknya data. Karena jika dirata-ratakan dengan data, maka rataan errornya adalah 6.927,83/41.188 = 0,168.
summary(n)
## Length Class Mode
## call 11 -none- call
## response 41188 -none- numeric
## covariate 2347716 -none- numeric
## model.list 2 -none- list
## err.fct 1 -none- function
## act.fct 1 -none- function
## linear.output 1 -none- logical
## data 58 data.frame list
## exclude 0 -none- NULL
## net.result 1 -none- list
## weights 1 -none- list
## generalized.weights 1 -none- list
## startweights 1 -none- list
## result.matrix 299 -none- numeric
# Visualisasi Plot Neural Network
plot(n, rep = 1)
# Error
n$result.matrix
## [,1]
## error 6.974579e+03
## reached.threshold 4.846096e-01
## steps 3.535700e+04
## Intercept.to.1layhid1 -3.943175e-02
## age.to.1layhid1 -3.380416e-01
## jobadmin..to.1layhid1 -8.537146e-01
## jobblue.collar.to.1layhid1 -5.787506e-01
## jobentrepreneur.to.1layhid1 5.122906e-02
## jobhousemaid.to.1layhid1 -1.414206e+00
## jobmanagement.to.1layhid1 -1.147781e-01
## jobretired.to.1layhid1 -1.825574e+00
## jobself.employed.to.1layhid1 -9.514744e-01
## jobservices.to.1layhid1 -5.352231e-01
## jobstudent.to.1layhid1 -1.267220e+00
## jobtechnician.to.1layhid1 -1.010304e+00
## jobunemployed.to.1layhid1 -1.280660e+00
## maritaldivorced.to.1layhid1 -3.348284e-01
## maritalmarried.to.1layhid1 -3.470448e-01
## maritalsingle.to.1layhid1 -4.468002e-01
## educationbasic.4y.to.1layhid1 3.652409e-01
## educationbasic.6y.to.1layhid1 2.898725e-01
## educationbasic.9y.to.1layhid1 4.590176e-01
## educationhigh.school.to.1layhid1 2.291217e-01
## educationilliterate.to.1layhid1 -1.091405e+00
## educationprofessional.course.to.1layhid1 -2.692769e-02
## educationuniversity.degree.to.1layhid1 -5.980826e-01
## defaultno.to.1layhid1 -1.579376e-02
## defaultyes.to.1layhid1 4.763746e+01
## housingno.to.1layhid1 9.178156e-01
## housingyes.to.1layhid1 6.431407e-01
## loanno.to.1layhid1 2.282163e+00
## loanyes.to.1layhid1 2.150783e+00
## contactcellular.to.1layhid1 -1.770463e+00
## contacttelephone.to.1layhid1 -2.532077e+00
## monthapr.to.1layhid1 8.152909e+00
## monthaug.to.1layhid1 1.265208e+00
## monthdec.to.1layhid1 4.940613e+00
## monthjul.to.1layhid1 -1.777568e+00
## monthjun.to.1layhid1 -7.091628e+02
## monthmar.to.1layhid1 1.211384e+00
## monthmay.to.1layhid1 4.029653e-01
## monthnov.to.1layhid1 -4.664547e+00
## monthoct.to.1layhid1 -7.153076e+02
## monthsep.to.1layhid1 2.670720e+00
## day_of_weekfri.to.1layhid1 1.282531e+00
## day_of_weekmon.to.1layhid1 1.170757e+00
## day_of_weekthu.to.1layhid1 6.745973e-01
## day_of_weektue.to.1layhid1 2.372688e-01
## day_of_weekwed.to.1layhid1 2.297081e-01
## duration.to.1layhid1 -1.718646e+01
## campaign.to.1layhid1 2.322730e+00
## pdays.to.1layhid1 9.301920e-01
## previous.to.1layhid1 1.552640e+00
## poutcomefailure.to.1layhid1 2.470593e+00
## poutcomenonexistent.to.1layhid1 1.926799e+00
## poutcomesuccess.to.1layhid1 1.984421e+00
## emp.var.rate.to.1layhid1 -3.418270e+01
## cons.price.idx.to.1layhid1 1.348409e+01
## cons.conf.idx.to.1layhid1 -8.036387e+00
## euribor3m.to.1layhid1 4.020901e+01
## nr.employed.to.1layhid1 -1.976946e+01
## Intercept.to.1layhid2 -5.369323e-01
## age.to.1layhid2 -3.768306e-01
## jobadmin..to.1layhid2 -8.018530e-01
## jobblue.collar.to.1layhid2 -7.259581e-01
## jobentrepreneur.to.1layhid2 -4.221833e-01
## jobhousemaid.to.1layhid2 -6.964847e-01
## jobmanagement.to.1layhid2 -6.759772e-01
## jobretired.to.1layhid2 -9.358644e-01
## jobself.employed.to.1layhid2 -7.831878e-01
## jobservices.to.1layhid2 -9.095970e-01
## jobstudent.to.1layhid2 -1.123828e+00
## jobtechnician.to.1layhid2 -8.650229e-01
## jobunemployed.to.1layhid2 -9.647325e-01
## maritaldivorced.to.1layhid2 -1.024932e+00
## maritalmarried.to.1layhid2 -9.655192e-01
## maritalsingle.to.1layhid2 -1.040302e+00
## educationbasic.4y.to.1layhid2 -2.063645e-01
## educationbasic.6y.to.1layhid2 1.350353e-01
## educationbasic.9y.to.1layhid2 -2.313404e-01
## educationhigh.school.to.1layhid2 -3.089856e-02
## educationilliterate.to.1layhid2 -1.242103e+00
## educationprofessional.course.to.1layhid2 -2.990263e-01
## educationuniversity.degree.to.1layhid2 -3.521732e-01
## defaultno.to.1layhid2 -5.233563e-02
## defaultyes.to.1layhid2 3.707040e+01
## housingno.to.1layhid2 -8.958838e-01
## housingyes.to.1layhid2 -9.591289e-01
## loanno.to.1layhid2 -1.049520e-01
## loanyes.to.1layhid2 -2.909156e-03
## contactcellular.to.1layhid2 -9.407004e-02
## contacttelephone.to.1layhid2 -6.652356e-01
## monthapr.to.1layhid2 -7.080869e+02
## monthaug.to.1layhid2 -9.220796e-01
## monthdec.to.1layhid2 -7.095184e+02
## monthjul.to.1layhid2 -9.155665e-01
## monthjun.to.1layhid2 -5.018353e-01
## monthmar.to.1layhid2 -1.128624e+00
## monthmay.to.1layhid2 -2.473248e-01
## monthnov.to.1layhid2 -3.944530e-03
## monthoct.to.1layhid2 -7.085860e+02
## monthsep.to.1layhid2 -1.534647e+00
## day_of_weekfri.to.1layhid2 -7.606146e-02
## day_of_weekmon.to.1layhid2 8.339708e-02
## day_of_weekthu.to.1layhid2 5.720414e-02
## day_of_weektue.to.1layhid2 1.521139e-01
## day_of_weekwed.to.1layhid2 1.098206e-01
## duration.to.1layhid2 -2.236398e+01
## campaign.to.1layhid2 1.372108e+00
## pdays.to.1layhid2 -6.679716e-02
## previous.to.1layhid2 -1.705194e+00
## poutcomefailure.to.1layhid2 2.026577e-01
## poutcomenonexistent.to.1layhid2 -2.186116e-01
## poutcomesuccess.to.1layhid2 -3.454041e-01
## emp.var.rate.to.1layhid2 3.089305e+00
## cons.price.idx.to.1layhid2 8.762296e-01
## cons.conf.idx.to.1layhid2 1.525068e+00
## euribor3m.to.1layhid2 -4.482827e+00
## nr.employed.to.1layhid2 5.739464e+00
## Intercept.to.1layhid3 -6.078767e-01
## age.to.1layhid3 6.491314e-01
## jobadmin..to.1layhid3 3.881111e-03
## jobblue.collar.to.1layhid3 3.538709e-01
## jobentrepreneur.to.1layhid3 -3.382814e-02
## jobhousemaid.to.1layhid3 4.971060e-01
## jobmanagement.to.1layhid3 -4.532082e-01
## jobretired.to.1layhid3 2.263087e-01
## jobself.employed.to.1layhid3 6.290454e-02
## jobservices.to.1layhid3 -2.219427e-01
## jobstudent.to.1layhid3 5.021298e-01
## jobtechnician.to.1layhid3 1.651778e-01
## jobunemployed.to.1layhid3 2.148394e-01
## maritaldivorced.to.1layhid3 -1.458897e+00
## maritalmarried.to.1layhid3 -1.626457e+00
## maritalsingle.to.1layhid3 -1.456964e+00
## educationbasic.4y.to.1layhid3 -8.804237e-01
## educationbasic.6y.to.1layhid3 -6.056060e-01
## educationbasic.9y.to.1layhid3 -4.398461e-01
## educationhigh.school.to.1layhid3 -6.445914e-01
## educationilliterate.to.1layhid3 -7.122754e+02
## educationprofessional.course.to.1layhid3 -3.683024e-01
## educationuniversity.degree.to.1layhid3 -1.562856e-01
## defaultno.to.1layhid3 1.693844e+00
## defaultyes.to.1layhid3 4.365319e+01
## housingno.to.1layhid3 -8.760821e-01
## housingyes.to.1layhid3 -8.368634e-01
## loanno.to.1layhid3 5.752606e-01
## loanyes.to.1layhid3 4.435606e-01
## contactcellular.to.1layhid3 5.741348e-01
## contacttelephone.to.1layhid3 1.733312e+00
## monthapr.to.1layhid3 7.207845e-01
## monthaug.to.1layhid3 1.349846e+00
## monthdec.to.1layhid3 2.360126e+00
## monthjul.to.1layhid3 1.214665e+00
## monthjun.to.1layhid3 1.188027e+00
## monthmar.to.1layhid3 1.175181e+00
## monthmay.to.1layhid3 -1.944711e-01
## monthnov.to.1layhid3 3.761273e-01
## monthoct.to.1layhid3 1.839257e+00
## monthsep.to.1layhid3 1.544431e+00
## day_of_weekfri.to.1layhid3 -6.147338e-02
## day_of_weekmon.to.1layhid3 1.577726e-01
## day_of_weekthu.to.1layhid3 1.357085e-01
## day_of_weektue.to.1layhid3 -3.912529e-01
## day_of_weekwed.to.1layhid3 -4.310578e-02
## duration.to.1layhid3 -7.800396e+01
## campaign.to.1layhid3 6.034265e+00
## pdays.to.1layhid3 2.928693e-01
## previous.to.1layhid3 2.031599e-02
## poutcomefailure.to.1layhid3 -4.314237e-01
## poutcomenonexistent.to.1layhid3 -5.011482e-01
## poutcomesuccess.to.1layhid3 -1.028814e+00
## emp.var.rate.to.1layhid3 6.970610e-01
## cons.price.idx.to.1layhid3 3.663608e-01
## cons.conf.idx.to.1layhid3 -6.334230e-01
## euribor3m.to.1layhid3 -3.042397e+00
## nr.employed.to.1layhid3 -8.080660e-01
## Intercept.to.1layhid4 2.424285e+00
## age.to.1layhid4 -2.493691e-01
## jobadmin..to.1layhid4 -1.351393e-01
## jobblue.collar.to.1layhid4 -7.612504e-01
## jobentrepreneur.to.1layhid4 1.751436e+00
## jobhousemaid.to.1layhid4 -1.917219e+00
## jobmanagement.to.1layhid4 -1.318798e-01
## jobretired.to.1layhid4 -6.981192e-01
## jobself.employed.to.1layhid4 -1.301138e+00
## jobservices.to.1layhid4 3.176494e-02
## jobstudent.to.1layhid4 -4.157002e-01
## jobtechnician.to.1layhid4 -2.442118e-01
## jobunemployed.to.1layhid4 -1.431643e+00
## maritaldivorced.to.1layhid4 -2.503322e-01
## maritalmarried.to.1layhid4 3.279044e-02
## maritalsingle.to.1layhid4 -3.391472e-02
## educationbasic.4y.to.1layhid4 4.212548e-01
## educationbasic.6y.to.1layhid4 3.636992e+00
## educationbasic.9y.to.1layhid4 8.225709e-01
## educationhigh.school.to.1layhid4 6.971041e-01
## educationilliterate.to.1layhid4 -4.688641e+01
## educationprofessional.course.to.1layhid4 5.656821e-01
## educationuniversity.degree.to.1layhid4 6.484980e-01
## defaultno.to.1layhid4 2.881661e+00
## defaultyes.to.1layhid4 -7.901246e+02
## housingno.to.1layhid4 7.699659e-01
## housingyes.to.1layhid4 4.208832e-01
## loanno.to.1layhid4 4.715331e-01
## loanyes.to.1layhid4 1.696820e+00
## contactcellular.to.1layhid4 1.387755e+00
## contacttelephone.to.1layhid4 -8.376486e-01
## monthapr.to.1layhid4 4.430051e+01
## monthaug.to.1layhid4 3.698935e+01
## monthdec.to.1layhid4 6.781209e+00
## monthjul.to.1layhid4 1.132885e+00
## monthjun.to.1layhid4 -2.189785e+00
## monthmar.to.1layhid4 2.514806e+01
## monthmay.to.1layhid4 -1.375355e+00
## monthnov.to.1layhid4 8.963554e+01
## monthoct.to.1layhid4 9.039989e+00
## monthsep.to.1layhid4 -2.099866e+00
## day_of_weekfri.to.1layhid4 3.971958e-01
## day_of_weekmon.to.1layhid4 9.094016e-01
## day_of_weekthu.to.1layhid4 -8.500766e-01
## day_of_weektue.to.1layhid4 -2.764588e-02
## day_of_weekwed.to.1layhid4 3.729230e-01
## duration.to.1layhid4 2.210497e+01
## campaign.to.1layhid4 7.326520e+00
## pdays.to.1layhid4 -1.630761e+01
## previous.to.1layhid4 -9.858499e+01
## poutcomefailure.to.1layhid4 1.417611e+01
## poutcomenonexistent.to.1layhid4 3.544579e-01
## poutcomesuccess.to.1layhid4 7.931191e+01
## emp.var.rate.to.1layhid4 -1.589710e+00
## cons.price.idx.to.1layhid4 2.319144e+01
## cons.conf.idx.to.1layhid4 1.074224e+01
## euribor3m.to.1layhid4 -1.959874e+01
## nr.employed.to.1layhid4 6.535267e+00
## Intercept.to.1layhid5 -1.454103e+00
## age.to.1layhid5 1.181234e+02
## jobadmin..to.1layhid5 1.023500e+00
## jobblue.collar.to.1layhid5 -3.077073e+01
## jobentrepreneur.to.1layhid5 -4.986039e+01
## jobhousemaid.to.1layhid5 -8.542154e+01
## jobmanagement.to.1layhid5 -2.663899e+01
## jobretired.to.1layhid5 -4.273841e+01
## jobself.employed.to.1layhid5 6.261677e+00
## jobservices.to.1layhid5 4.192692e+01
## jobstudent.to.1layhid5 -1.288770e+01
## jobtechnician.to.1layhid5 1.258402e+01
## jobunemployed.to.1layhid5 3.086085e+00
## maritaldivorced.to.1layhid5 -5.577489e+00
## maritalmarried.to.1layhid5 -3.732894e+00
## maritalsingle.to.1layhid5 -1.333950e-02
## educationbasic.4y.to.1layhid5 -4.629823e+00
## educationbasic.6y.to.1layhid5 -1.470269e+01
## educationbasic.9y.to.1layhid5 1.215317e+01
## educationhigh.school.to.1layhid5 -4.019384e+01
## educationilliterate.to.1layhid5 -1.615688e+02
## educationprofessional.course.to.1layhid5 -1.474576e+01
## educationuniversity.degree.to.1layhid5 5.864750e-01
## defaultno.to.1layhid5 -2.432976e-01
## defaultyes.to.1layhid5 1.579997e+02
## housingno.to.1layhid5 -6.372306e+00
## housingyes.to.1layhid5 7.497248e-01
## loanno.to.1layhid5 4.565376e-01
## loanyes.to.1layhid5 2.516680e+01
## contactcellular.to.1layhid5 -1.111856e+01
## contacttelephone.to.1layhid5 3.210341e+01
## monthapr.to.1layhid5 4.448766e-03
## monthaug.to.1layhid5 -7.335852e+01
## monthdec.to.1layhid5 -8.244310e+01
## monthjul.to.1layhid5 -1.754346e+01
## monthjun.to.1layhid5 -1.363544e+01
## monthmar.to.1layhid5 -1.390232e+03
## monthmay.to.1layhid5 4.673555e+01
## monthnov.to.1layhid5 -3.607367e+00
## monthoct.to.1layhid5 3.065357e+02
## monthsep.to.1layhid5 2.099520e+01
## day_of_weekfri.to.1layhid5 3.612040e+00
## day_of_weekmon.to.1layhid5 8.958457e-02
## day_of_weekthu.to.1layhid5 -1.180417e+01
## day_of_weektue.to.1layhid5 -7.610428e-02
## day_of_weekwed.to.1layhid5 -1.614685e+01
## duration.to.1layhid5 1.095100e+03
## campaign.to.1layhid5 -1.481037e+02
## pdays.to.1layhid5 2.763216e-01
## previous.to.1layhid5 -4.454451e+02
## poutcomefailure.to.1layhid5 5.094644e+01
## poutcomenonexistent.to.1layhid5 1.474048e+00
## poutcomesuccess.to.1layhid5 -8.198139e+00
## emp.var.rate.to.1layhid5 7.904390e+00
## cons.price.idx.to.1layhid5 -2.241584e+02
## cons.conf.idx.to.1layhid5 -2.740269e+00
## euribor3m.to.1layhid5 4.490410e+00
## nr.employed.to.1layhid5 1.289901e+01
## Intercept.to.y -4.955647e-01
## 1layhid1.to.y -6.790349e+00
## 1layhid2.to.y -1.825784e+01
## 1layhid3.to.y -9.050872e+00
## 1layhid4.to.y 2.474229e+00
## 1layhid5.to.y -1.250951e+00
Output layer pada hasil ANN data train ini adalah ada sebanyak 58 neuron di sebelah kiri (sesuai dengan jumlah peubah X) dengan target adalah Y di sebelah kanan sebanyak 1 neuron.
Nilai biasnya adalah:
Intercept.to.1layhid1 1.444866e-01
Sedangkan variabel 1layhid adalah bobotnya dengan bobot terbesar adalah:
1layhid1.to.y -5.575475e+00
Yang artinya ini adalah kontributor terbesar. Kemudian nilai errornya adalah:
error 6.927831e+03
Dan Steps yang dicapai adalah:
steps 2.233100e+04
# Prediksi rating Neural Network Model
output = compute(n, rep = 1, train_norm[, -1])
head(output$net.result)
## [,1]
## [1,] 9.456773e-16
## [2,] 9.456773e-16
## [3,] 2.211753e-04
## [4,] 9.456773e-16
## [5,] 9.456773e-16
## [6,] 9.456773e-16
head(train_norm[1, ])
## age jobadmin. jobblue.collar jobentrepreneur jobhousemaid jobmanagement
## 1 0.4814815 0 0 0 1 0
## jobretired jobself.employed jobservices jobstudent jobtechnician
## 1 0 0 0 0 0
## jobunemployed maritaldivorced maritalmarried maritalsingle educationbasic.4y
## 1 0 0 1 0 1
## educationbasic.6y educationbasic.9y educationhigh.school educationilliterate
## 1 0 0 0 0
## educationprofessional.course educationuniversity.degree defaultno defaultyes
## 1 0 0 1 0
## housingno housingyes loanno loanyes contactcellular contacttelephone monthapr
## 1 1 0 1 0 0 1 0
## monthaug monthdec monthjul monthjun monthmar monthmay monthnov monthoct
## 1 0 0 0 0 0 1 0 0
## monthsep day_of_weekfri day_of_weekmon day_of_weekthu day_of_weektue
## 1 0 0 1 0 0
## day_of_weekwed duration campaign pdays previous poutcomefailure
## 1 0 0.05307035 0 1 0 0
## poutcomenonexistent poutcomesuccess emp.var.rate cons.price.idx cons.conf.idx
## 1 1 0 0.9375 0.6987529 0.6025105
## euribor3m nr.employed y
## 1 0.9573793 0.8597353 0
head(test_norm[1, ])
## age jobadmin. jobblue.collar jobentrepreneur jobhousemaid jobmanagement
## 1 0.1714286 0 1 0 0 0
## jobretired jobself.employed jobservices jobstudent jobtechnician
## 1 0 0 0 0 0
## jobunemployed maritaldivorced maritalmarried maritalsingle educationbasic.4y
## 1 0 0 1 0 0
## educationbasic.6y educationbasic.9y educationhigh.school educationilliterate
## 1 0 1 0 0
## educationprofessional.course educationuniversity.degree defaultno defaultyes
## 1 0 0 1 0
## housingno housingyes loanno loanyes contactcellular contacttelephone monthapr
## 1 0 1 1 0 1 0 0
## monthaug monthdec monthjul monthjun monthmar monthmay monthnov monthoct
## 1 0 0 0 0 0 1 0 0
## monthsep day_of_weekfri day_of_weekmon day_of_weekthu day_of_weektue
## 1 0 1 0 0 0
## day_of_weekwed duration campaign pdays previous poutcomefailure
## 1 0 0.133681 0.02941176 1 0 0
## poutcomenonexistent poutcomesuccess emp.var.rate cons.price.idx cons.conf.idx
## 1 1 0 0.3333333 0.2696804 0.1924686
## euribor3m nr.employed y
## 1 0.1537415 0.5122873 0
# Cari peluang untuk data train
prob_train = compute(n, train_norm[, -which(names(train_norm) == "y")])$net.result
pred_y_train_f = ifelse(prob_train > 0.5, "Yes", "No") # menghasilkan biner 0 atau 1
pred_y_train_f = factor(pred_y_train_f, levels = c("No", "Yes"))
# Data Asil
aktual_y_train_f = factor(train$y, levels = c(0, 1), labels = c("No", "Yes"))
# Hitung eluang untuk data test
prob_test = compute(n, test_norm[, -which(names(test_norm) == "y")])$net.result
# Ubah menjadi factor
pred_y_test_f = ifelse(prob_test > 0.5, "Yes", "No")
pred_y_test_f = factor(pred_y_test_f, levels = c(0, 1), labels = c("No", "Yes"))
# Data asli
aktual_y_test_f = factor(test_norm$y, levels = c(0, 1), labels = c("No", "Yes"))
summary.train = confusionMatrix(pred_y_train_f, aktual_y_train_f, positive = "Yes", mode = "everything")
# Tampilkan Statistik Lengkap Training
cat("--- STATISTICS DATA TRAINING ---\n")
## --- STATISTICS DATA TRAINING ---
print(summary.train)
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 35113 1870
## Yes 1435 2770
##
## Accuracy : 0.9198
## 95% CI : (0.9171, 0.9224)
## No Information Rate : 0.8873
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5815
##
## Mcnemar's Test P-Value : 4.378e-14
##
## Sensitivity : 0.59698
## Specificity : 0.96074
## Pos Pred Value : 0.65874
## Neg Pred Value : 0.94944
## Precision : 0.65874
## Recall : 0.59698
## F1 : 0.62634
## Prevalence : 0.11265
## Detection Rate : 0.06725
## Detection Prevalence : 0.10209
## Balanced Accuracy : 0.77886
##
## 'Positive' Class : Yes
##
Accuracy : 0.8902 (overall corectness)
Sensitivity : 0.034483 (finding positives)
Precision : 0.796020
F1 : 0.066102
Akurasi (0.89%): secara keseluruhan model berhasil menebak klasifikasi nasabah yang mau atau tidak berlangganan.
Precision (0.79%): Keakuratan tebakan model benar terhadap prediksinya cukup akurat dengan nilai 0.79% cukup tinggi.
Sensitivity/Recall (0.03%): Dari seluruh nasabah yang benar-benar mau berlangganan ditebak dengan benar oleh model sebesar 0.03%, nilai ini cukup kecil.
F-1 Score (0.06%): Nilai rataan harmonik antara precision dan recall, nilai ini menunjukkan bahwa 0.06% adalah sebagai performa model yang artinya model masih sangat kesulitan untuk menyeimbangkan antara False Positive dan False Negative.
# Tampilkan Statistik Lengkap Testing
cat("--- SUMMARY STATISTICS: DATA TESTING ---\n")
## --- SUMMARY STATISTICS: DATA TESTING ---
summary.test = confusionMatrix(pred_y_test_f, aktual_y_test_f,
positive = "Yes",
mode = "everything")
print(summary.test)
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 0 0
## Yes 0 0
##
## Accuracy : NaN
## 95% CI : (NA, NA)
## No Information Rate : NA
## P-Value [Acc > NIR] : NA
##
## Kappa : NaN
##
## Mcnemar's Test P-Value : NA
##
## Sensitivity : NA
## Specificity : NA
## Pos Pred Value : NA
## Neg Pred Value : NA
## Precision : NA
## Recall : NA
## F1 : NA
## Prevalence : NaN
## Detection Rate : NaN
## Detection Prevalence : NaN
## Balanced Accuracy : NA
##
## 'Positive' Class : Yes
##
Accuracy : 0.9051 (overall corectness)
Precision : 0.54839
Sensitivity : 0.75388 (finding positives)
Specificity : 0.92366 (finding negatives)
F1 : 0.63492
Akurasi (0.90%): Secara keseluruhan, model menebak data test cukup baik dengan nilai akurasi yang cukup tinggi.
Precision (0.54%): Keakuratan tebakan model benar terhadap prediksinya cukup akurat dengan nilai 0.54% cukup tinggi.
Recall/Sensitivity (0.75%): Dari seluruh nasabah yang benar-benar mau berlangganan ditebak dengan benar oleh model sebesar 0.75%, nilai ini cukup kecil.
F-1 Score (0.63%): Nilai rataan harmonik antara precision dan recall, nilai ini menunjukkan bahwa 0.63% adalah sebagai performa model yang artinya model sudah cukup baik untuk menyeimbangkan antara False Positive dan False Negative.
# Ambil nilai akurasi dari summary
accuracy = summary.train$overall['Accuracy']
accuracy_test = summary.test$overall['Accuracy']
# Membuat data frame untuk grafik
performa = data.frame(
Dataset = c("Training", "Testing"),
Akurasi = c(accuracy, accuracy_test)
)
# Visualisasi
barplot(performa$Akurasi,
names.arg = performa$Dataset,
ylim = c(0, 1), col = c("skyblue", "orange"),
main = "Perbandingan Akurasi Model",
ylab = "Nilai Akurasi")
abline(h = 0.9, col = "red", lty = 2) # Garis batas 90%
Berdasarkan nilai akurasi dari nilai metrik menunjukkan bahwa model
masih kurang baik dalam menebak data train sedangkan model
sudah cukup baik dalam menebak data test. Hal ini
dibuktikan dengan nilai akurasi pada confussion matri yang menunjukkan
bahwa nilai metrik dalam data test lebih masuk akal.
Maksudnya, nilai pada data test ini selaras, ketika akurasi, recall/sensitivity, dan F-1 Score itu semua tinggi. Sedangkan pada data train, hanya akurasi dan presisi saja yang tinggi tapi nilai F1-Score dan sensitivity sangat rendah. Artinya pada data train model masih sulit untuk menebak target.
# Ambil hasil prediksi (dalam bentuk peluang)
prob_test = compute(n, test_norm[, -which(names(test_norm) == "y")])$net.result
# Ubah peluang menjadi label "Yes" atau "No" (Threshold 0.5)
prediksi_label = ifelse(prob_test > 0.5, "Yes", "No")
# abungkan dengan data test asli (sebelum normalisasi agar mudah dibaca)
hasil_akhir = data.frame(test, Prediksi_Model = prediksi_label)
# Jika ingin membuka jendela data khusus dua kolom ini
View(hasil_akhir[, c("age", "Prediksi_Model")])
# Jumlah dan persentase Yes dan No
library(dplyr)
hasil_akhir %>%
group_by(Prediksi_Model) %>%
summarise(Jumlah = n(),
Persentase = (n() / nrow(hasil_akhir)) * 100)
## # A tibble: 2 × 3
## Prediksi_Model Jumlah Persentase
## <chr> <int> <dbl>
## 1 No 3501 85.0
## 2 Yes 618 15.0
# Simpan hasil prediksi
write.csv(hasil_akhir, "Hasil_Prediksi_Nasabah.csv", row.names = FALSE)