library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
purchase <- read.csv("Customer_Behaviour.csv", stringsAsFactors = T)
purchase
## Gender Age Salary Purchased
## 1 Male < 30 Low No
## 2 Male 30-50 Low No
## 3 Female < 30 Medium No
## 4 Female < 30 Medium No
## 5 Male < 30 Medium No
## 6 Male < 30 Medium No
## 7 Female < 30 Medium No
## 8 Female 30-50 High Yes
## 9 Male < 30 Low No
## 10 Female 30-50 Medium No
## 11 Female < 30 Medium No
## 12 Female < 30 Medium No
## 13 Male < 30 Medium No
## 14 Male 30-50 Low No
## 15 Male < 30 Medium No
## 16 Male < 30 Medium No
## 17 Male 30-50 Low Yes
## 18 Male 30-50 Low Yes
## 19 Male 30-50 Low Yes
## 20 Female 30-50 Low Yes
## 21 Male 30-50 Low Yes
## 22 Female 30-50 Medium Yes
## 23 Male 30-50 Low Yes
## 24 Female 30-50 Low Yes
## 25 Male 30-50 Low Yes
## 26 Male 30-50 Low Yes
## 27 Male 30-50 Low Yes
## 28 Female 30-50 Low Yes
## 29 Male < 30 Medium No
## 30 Male 30-50 Low No
## 31 Male 30-50 Medium No
## 32 Female < 30 High Yes
## 33 Female < 30 Low No
## 34 Female < 30 Medium No
## 35 Male < 30 High No
## 36 Male 30-50 Low No
## 37 Female 30-50 Low No
## 38 Male 30-50 Medium No
## 39 Female < 30 Medium No
## 40 Female < 30 Low No
## 41 Female < 30 Low No
## 42 Female 30-50 Medium No
## 43 Male 30-50 High No
## 44 Male 30-50 Low No
## 45 Female < 30 Medium No
## 46 Male < 30 Low No
## 47 Male < 30 Medium No
## 48 Female < 30 Medium No
## 49 Male 30-50 High Yes
## 50 Female 30-50 High No
## 51 Female < 30 Low No
## 52 Female < 30 Medium No
## 53 Female < 30 Medium No
## 54 Female 30-50 Low No
## 55 Female < 30 Medium No
## 56 Female < 30 Medium No
## 57 Female < 30 Medium No
## 58 Male < 30 Medium No
## 59 Male < 30 Low No
## 60 Female 30-50 High No
## 61 Male < 30 Low No
## 62 Male < 30 Medium No
## 63 Female < 30 Medium No
## 64 Male 30-50 High Yes
## 65 Female > 50 Medium No
## 66 Male < 30 Medium No
## 67 Male < 30 Low No
## 68 Female < 30 Medium No
## 69 Female < 30 Medium No
## 70 Female 30-50 Medium No
## 71 Male < 30 Medium No
## 72 Female < 30 Low No
## 73 Female < 30 Low No
## 74 Female 30-50 High No
## 75 Male 30-50 Low No
## 76 Male 30-50 High Yes
## 77 Male < 30 Medium No
## 78 Female < 30 Low No
## 79 Female < 30 Medium No
## 80 Female < 30 Low No
## 81 Male 30-50 Medium No
## 82 Male 30-50 Low No
## 83 Male < 30 Medium No
## 84 Male 30-50 Medium No
## 85 Female 30-50 Medium No
## 86 Female 30-50 High Yes
## 87 Male < 30 Medium No
## 88 Female < 30 Medium No
## 89 Male < 30 Medium No
## 90 Male 30-50 Medium No
## 91 Male < 30 Medium No
## 92 Female 30-50 High No
## 93 Male < 30 Low No
## 94 Female < 30 Low No
## 95 Female < 30 Medium No
## 96 Female 30-50 Medium No
## 97 Female 30-50 Low No
## 98 Male < 30 High Yes
## 99 Male 30-50 Medium No
## 100 Female < 30 Low No
## 101 Male < 30 Medium No
## 102 Male < 30 Medium No
## 103 Female 30-50 Medium No
## 104 Female 30-50 High Yes
## 105 Female < 30 Low No
## 106 Male < 30 Medium No
## 107 Female < 30 Low No
## 108 Male < 30 High No
## 109 Male < 30 Medium No
## 110 Female 30-50 Medium No
## 111 Female 30-50 Medium No
## 112 Female 30-50 Medium No
## 113 Male 30-50 Medium No
## 114 Male 30-50 Medium No
## 115 Male 30-50 Medium No
## 116 Male 30-50 Medium No
## 117 Male 30-50 Medium No
## 118 Male 30-50 Medium No
## 119 Male 30-50 Medium No
## 120 Male 30-50 Medium No
## 121 Female 30-50 Medium No
## 122 Male 30-50 Medium No
## 123 Female 30-50 Medium No
## 124 Male 30-50 Medium No
## 125 Female 30-50 Medium No
## 126 Female 30-50 Medium No
## 127 Male 30-50 Medium No
## 128 Male < 30 Low No
## 129 Male 30-50 Low No
## 130 Female < 30 Medium No
## 131 Male 30-50 Medium No
## 132 Male 30-50 Low No
## 133 Male 30-50 Medium No
## 134 Female < 30 Medium No
## 135 Female < 30 Medium No
## 136 Male < 30 Medium No
## 137 Female < 30 Medium No
## 138 Male 30-50 High Yes
## 139 Female < 30 Medium No
## 140 Male < 30 Low No
## 141 Male < 30 Medium No
## 142 Female < 30 Medium No
## 143 Male 30-50 Medium No
## 144 Male 30-50 High No
## 145 Female 30-50 Low No
## 146 Female < 30 High No
## 147 Female < 30 High Yes
## 148 Female 30-50 Low No
## 149 Male < 30 Medium No
## 150 Male < 30 Medium No
## 151 Female < 30 Low No
## 152 Male 30-50 Medium No
## 153 Male 30-50 Medium No
## 154 Female 30-50 Medium No
## 155 Male 30-50 Medium No
## 156 Female 30-50 Low No
## 157 Male 30-50 Medium No
## 158 Male < 30 Medium No
## 159 Male < 30 Low No
## 160 Female 30-50 High Yes
## 161 Male 30-50 High Yes
## 162 Male < 30 High No
## 163 Female 30-50 Low No
## 164 Male 30-50 Low No
## 165 Female 30-50 Medium No
## 166 Female < 30 Medium No
## 167 Female < 30 Medium No
## 168 Female 30-50 Medium No
## 169 Male < 30 High Yes
## 170 Female < 30 Medium No
## 171 Male < 30 Medium No
## 172 Male 30-50 High No
## 173 Female < 30 High No
## 174 Female 30-50 Medium No
## 175 Female 30-50 Medium No
## 176 Female < 30 Low No
## 177 Female 30-50 Medium No
## 178 Male < 30 Low No
## 179 Male < 30 Low No
## 180 Female 30-50 Low No
## 181 Male < 30 Low No
## 182 Female 30-50 Medium No
## 183 Female 30-50 High Yes
## 184 Male 30-50 Medium No
## 185 Female 30-50 Medium No
## 186 Male 30-50 Medium No
## 187 Female < 30 Medium No
## 188 Female 30-50 Low No
## 189 Male 30-50 Medium No
## 190 Male < 30 Low No
## 191 Male < 30 Medium No
## 192 Female < 30 Low No
## 193 Male < 30 Medium No
## 194 Male < 30 Medium No
## 195 Male < 30 High No
## 196 Male 30-50 Medium No
## 197 Female 30-50 Medium No
## 198 Female < 30 Low No
## 199 Male < 30 Medium No
## 200 Male 30-50 Low No
## 201 Male 30-50 Low No
## 202 Male 30-50 Medium No
## 203 Female 30-50 High Yes
## 204 Female 30-50 Medium No
## 205 Female > 50 High Yes
## 206 Female 30-50 Medium No
## 207 Female > 50 High Yes
## 208 Female > 50 High No
## 209 Female 30-50 High Yes
## 210 Female 30-50 Low No
## 211 Female 30-50 High Yes
## 212 Male > 50 High Yes
## 213 Female > 50 Low No
## 214 Male 30-50 Medium No
## 215 Male 30-50 Medium No
## 216 Female > 50 High Yes
## 217 Male 30-50 Medium No
## 218 Male 30-50 Medium No
## 219 Female 30-50 High No
## 220 Male > 50 High Yes
## 221 Female 30-50 Medium No
## 222 Male 30-50 High Yes
## 223 Male 30-50 High Yes
## 224 Male > 50 High Yes
## 225 Female 30-50 Medium No
## 226 Male 30-50 Medium No
## 227 Female 30-50 High Yes
## 228 Male > 50 High Yes
## 229 Female 30-50 Medium No
## 230 Female 30-50 Medium Yes
## 231 Female 30-50 High Yes
## 232 Male 30-50 Low No
## 233 Male 30-50 High Yes
## 234 Male 30-50 Medium Yes
## 235 Female 30-50 High No
## 236 Male 30-50 Medium Yes
## 237 Male 30-50 Medium No
## 238 Female 30-50 Medium No
## 239 Female 30-50 Medium No
## 240 Female > 50 High Yes
## 241 Male 30-50 High Yes
## 242 Male 30-50 Medium No
## 243 Female 30-50 Medium Yes
## 244 Female > 50 High Yes
## 245 Female 30-50 Medium No
## 246 Female > 50 High Yes
## 247 Female 30-50 Medium No
## 248 Female > 50 High Yes
## 249 Male 30-50 Medium No
## 250 Female 30-50 High Yes
## 251 Female 30-50 Low No
## 252 Male 30-50 Medium No
## 253 Female 30-50 High Yes
## 254 Female 30-50 High Yes
## 255 Female 30-50 Medium No
## 256 Female > 50 High Yes
## 257 Female 30-50 Medium No
## 258 Male 30-50 Medium No
## 259 Female > 50 High Yes
## 260 Female 30-50 High Yes
## 261 Female 30-50 Medium No
## 262 Male 30-50 High Yes
## 263 Female > 50 High Yes
## 264 Female 30-50 Medium No
## 265 Male 30-50 High Yes
## 266 Female 30-50 High Yes
## 267 Male 30-50 Medium No
## 268 Male 30-50 Medium No
## 269 Female 30-50 High Yes
## 270 Male 30-50 Medium No
## 271 Female 30-50 High No
## 272 Female > 50 Medium Yes
## 273 Male > 50 Low Yes
## 274 Male 30-50 High Yes
## 275 Female > 50 Low Yes
## 276 Male > 50 Medium Yes
## 277 Male 30-50 Medium No
## 278 Male 30-50 Medium Yes
## 279 Female > 50 Low Yes
## 280 Female 30-50 Low Yes
## 281 Female > 50 Medium Yes
## 282 Male 30-50 Medium No
## 283 Male 30-50 Medium Yes
## 284 Female > 50 Low Yes
## 285 Male 30-50 High No
## 286 Female 30-50 High Yes
## 287 Female 30-50 Medium No
## 288 Female 30-50 High Yes
## 289 Male 30-50 Medium No
## 290 Female 30-50 Medium Yes
## 291 Male 30-50 High Yes
## 292 Male 30-50 High Yes
## 293 Male > 50 Low Yes
## 294 Male 30-50 Medium No
## 295 Female 30-50 Medium No
## 296 Female 30-50 Medium No
## 297 Male 30-50 Medium Yes
## 298 Female 30-50 High Yes
## 299 Male 30-50 Medium No
## 300 Male 30-50 High Yes
## 301 Female > 50 Low Yes
## 302 Male 30-50 Medium Yes
## 303 Female 30-50 High Yes
## 304 Male 30-50 Medium Yes
## 305 Female 30-50 Medium No
## 306 Male 30-50 Medium No
## 307 Female > 50 High No
## 308 Female 30-50 High Yes
## 309 Male 30-50 High Yes
## 310 Female 30-50 Medium No
## 311 Female 30-50 Medium No
## 312 Male 30-50 High Yes
## 313 Female 30-50 Medium No
## 314 Female 30-50 High Yes
## 315 Female 30-50 Medium No
## 316 Female 30-50 Medium Yes
## 317 Female > 50 High Yes
## 318 Male 30-50 Medium No
## 319 Male 30-50 Low Yes
## 320 Male 30-50 Medium No
## 321 Female > 50 High Yes
## 322 Female > 50 Medium Yes
## 323 Male 30-50 Medium No
## 324 Female 30-50 Low Yes
## 325 Female 30-50 High Yes
## 326 Female 30-50 Medium No
## 327 Male 30-50 Medium No
## 328 Female 30-50 Medium No
## 329 Male 30-50 High Yes
## 330 Female 30-50 High Yes
## 331 Male 30-50 Medium No
## 332 Female 30-50 High Yes
## 333 Male 30-50 Medium No
## 334 Male 30-50 Medium No
## 335 Male > 50 Medium Yes
## 336 Female 30-50 Medium No
## 337 Male > 50 High Yes
## 338 Male 30-50 Medium No
## 339 Female 30-50 Medium No
## 340 Male 30-50 High Yes
## 341 Female > 50 High Yes
## 342 Male 30-50 Medium No
## 343 Female 30-50 Medium No
## 344 Female 30-50 Medium Yes
## 345 Male 30-50 High Yes
## 346 Female 30-50 Medium No
## 347 Male > 50 Medium Yes
## 348 Female > 50 High Yes
## 349 Male 30-50 Medium No
## 350 Male 30-50 Medium No
## 351 Female 30-50 High Yes
## 352 Male 30-50 Medium No
## 353 Female 30-50 High Yes
## 354 Female 30-50 Medium No
## 355 Male 30-50 High Yes
## 356 Male > 50 Low Yes
## 357 Male > 50 Medium Yes
## 358 Female 30-50 Medium No
## 359 Male 30-50 Medium Yes
## 360 Male 30-50 Medium No
## 361 Male 30-50 High Yes
## 362 Female > 50 Low Yes
## 363 Female 30-50 Medium Yes
## 364 Female 30-50 Medium No
## 365 Male 30-50 High Yes
## 366 Female > 50 Low Yes
## 367 Female > 50 Medium Yes
## 368 Male 30-50 Medium Yes
## 369 Male 30-50 Medium No
## 370 Female > 50 Low Yes
## 371 Female > 50 Medium Yes
## 372 Male > 50 Medium Yes
## 373 Female 30-50 Medium No
## 374 Male > 50 High Yes
## 375 Female 30-50 Medium No
## 376 Female 30-50 Low Yes
## 377 Female 30-50 Medium No
## 378 Female 30-50 Medium No
## 379 Male 30-50 Medium Yes
## 380 Female > 50 Low Yes
## 381 Male 30-50 Medium No
## 382 Male 30-50 Low Yes
## 383 Female 30-50 High Yes
## 384 Male 30-50 Low Yes
## 385 Female > 50 Low Yes
## 386 Male > 50 Medium Yes
## 387 Female 30-50 Low Yes
## 388 Male 30-50 Medium No
## 389 Male 30-50 Low Yes
## 390 Female 30-50 Low Yes
## 391 Male 30-50 Low Yes
## 392 Male 30-50 Low Yes
## 393 Female 30-50 Medium Yes
## 394 Male > 50 Low Yes
## 395 Female 30-50 Medium No
## 396 Female 30-50 Low Yes
## 397 Male > 50 Low Yes
## 398 Female 30-50 Low Yes
## 399 Male 30-50 Low No
## 400 Female 30-50 Low Yes
Data description:
RNGkind(sample.kind = "Rounding")
## Warning in RNGkind(sample.kind = "Rounding"): non-uniform 'Rounding' sampler
## used
set.seed(100)
purchase_intrain <- sample(nrow(purchase), nrow(purchase)*0.8)
purchase_train <- purchase[purchase_intrain, ]
purchase_test <- purchase[-purchase_intrain, ]
prop.table(table(purchase_train$Purchase))
##
## No Yes
## 0.640625 0.359375
# upsampling
library(caret)
## Loading required package: ggplot2
## Warning in (function (kind = NULL, normal.kind = NULL, sample.kind = NULL) :
## non-uniform 'Rounding' sampler used
## Loading required package: lattice
RNGkind(sample.kind = "Rounding")
## Warning in RNGkind(sample.kind = "Rounding"): non-uniform 'Rounding' sampler
## used
set.seed(100)
purchase_train <- upSample(x = purchase_train %>% select(-Purchased),
y = purchase_train$Purchased,
yname = "Purchased")
prop.table(table(purchase_train$Purchased))
##
## No Yes
## 0.5 0.5
library(e1071)
naive_model <- naiveBayes(x = purchase_train %>% select(-Purchased),
y = purchase_train$Purchased)
naive_model
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = purchase_train %>% select(-Purchased),
## y = purchase_train$Purchased)
##
## A-priori probabilities:
## purchase_train$Purchased
## No Yes
## 0.5 0.5
##
## Conditional probabilities:
## Gender
## purchase_train$Purchased Female Male
## No 0.5170732 0.4829268
## Yes 0.5756098 0.4243902
##
## Age
## purchase_train$Purchased < 30 > 50 30-50
## No 0.346341463 0.009756098 0.643902439
## Yes 0.034146341 0.336585366 0.629268293
##
## Salary
## purchase_train$Purchased High Low Medium
## No 0.06829268 0.22439024 0.70731707
## Yes 0.48292683 0.29756098 0.21951220
prediction_naive_train <- predict(object = naive_model, # nama model
newdata = purchase_train,
type = "class") # probabilitas
library(caret)
eval_naive_train <- confusionMatrix(prediction_naive_train, reference = purchase_train$Purchased, positive = "Yes")
prediction_naive_test <- predict(object = naive_model, # nama model
newdata = purchase_test,
type = "class") # probabilitas
library(caret)
eval_naive_test <- confusionMatrix(prediction_naive_test, reference = purchase_test$Purchased, positive = "Yes")
eval_naive_test
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 46 1
## Yes 6 27
##
## Accuracy : 0.9125
## 95% CI : (0.828, 0.9641)
## No Information Rate : 0.65
## P-Value [Acc > NIR] : 5.423e-08
##
## Kappa : 0.8153
##
## Mcnemar's Test P-Value : 0.1306
##
## Sensitivity : 0.9643
## Specificity : 0.8846
## Pos Pred Value : 0.8182
## Neg Pred Value : 0.9787
## Prevalence : 0.3500
## Detection Rate : 0.3375
## Detection Prevalence : 0.4125
## Balanced Accuracy : 0.9245
##
## 'Positive' Class : Yes
##
purchase_pred_prob <- predict(object = naive_model,
newdata = purchase_test,
type = "raw")
head(purchase_pred_prob)
## No Yes
## [1,] 0.8969466 0.10305340
## [2,] 0.9670607 0.03293928
## [3,] 0.9738155 0.02618454
## [4,] 0.8969466 0.10305340
## [5,] 0.9738155 0.02618454
## [6,] 0.4675379 0.53246208
data_roc_naive <- data.frame(pred_prob = purchase_pred_prob[,"Yes"],
actual = ifelse(purchase_test$Purchased == "Yes", 1, 0))
head(data_roc_naive)
## pred_prob actual
## 1 0.10305340 0
## 2 0.03293928 0
## 3 0.02618454 0
## 4 0.10305340 0
## 5 0.02618454 0
## 6 0.53246208 1
Prepoare ROC with object prediction()
library(ROCR)
#object prediction
naive_roc <- prediction(predictions = data_roc_naive$pred_prob,
labels = data_roc_naive$actual)
# nilai AUC
naive_auc <- performance(naive_roc, measure = "auc")
naive_auc@y.values[[1]]
## [1] 0.9330357
plot(performance(naive_roc, "tpr", "fpr"))
abline(0, 1, lty = 2)
text(0.4, 0.6, paste("AUC = ", round(naive_auc@y.values[[1]], 2)))
library(partykit)
## Loading required package: grid
## Loading required package: libcoin
## Loading required package: mvtnorm
model_dt <- ctree(formula = purchase_train$Purchased ~.,
data = purchase_train %>% select(-Purchased),
control = ctree_control(mincriterion=0.95))
plot(model_dt, type = "simple")
#### 5.2 Confussion Matrix
# prediction to data train
pred_train_dt <- predict(model_dt, newdata = purchase_train)
confusionMatrix(pred_train_dt, reference = purchase_train$Purchased, positive = "Yes")
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 167 28
## Yes 38 177
##
## Accuracy : 0.839
## 95% CI : (0.7998, 0.8733)
## No Information Rate : 0.5
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.678
##
## Mcnemar's Test P-Value : 0.2679
##
## Sensitivity : 0.8634
## Specificity : 0.8146
## Pos Pred Value : 0.8233
## Neg Pred Value : 0.8564
## Prevalence : 0.5000
## Detection Rate : 0.4317
## Detection Prevalence : 0.5244
## Balanced Accuracy : 0.8390
##
## 'Positive' Class : Yes
##
pred_test_dt <- predict(model_dt, newdata = purchase_test)
eval_dt_test <- confusionMatrix(pred_test_dt, reference = purchase_test$Purchased, positive = "Yes")
eval_dt_test
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 44 1
## Yes 8 27
##
## Accuracy : 0.8875
## 95% CI : (0.7972, 0.9472)
## No Information Rate : 0.65
## P-Value [Acc > NIR] : 1.228e-06
##
## Kappa : 0.7662
##
## Mcnemar's Test P-Value : 0.0455
##
## Sensitivity : 0.9643
## Specificity : 0.8462
## Pos Pred Value : 0.7714
## Neg Pred Value : 0.9778
## Prevalence : 0.3500
## Detection Rate : 0.3375
## Detection Prevalence : 0.4375
## Balanced Accuracy : 0.9052
##
## 'Positive' Class : Yes
##
purchase_pred_prob <- predict(object = model_dt,
newdata = purchase_test,
type = "prob")
head(purchase_pred_prob)
## No Yes
## 1 1.0000000 0.0000000
## 3 1.0000000 0.0000000
## 6 1.0000000 0.0000000
## 9 1.0000000 0.0000000
## 16 1.0000000 0.0000000
## 17 0.3898305 0.6101695
data_roc_dt <- data.frame(pred_prob = purchase_pred_prob[,"Yes"],
actual = ifelse(purchase_test$Purchased == "Yes", 1, 0))
head(data_roc_dt)
## pred_prob actual
## 1 0.0000000 0
## 3 0.0000000 0
## 6 0.0000000 0
## 9 0.0000000 0
## 16 0.0000000 0
## 17 0.6101695 1
Prepare ROC with prediction()
library(ROCR)
#object prediction
dt_roc <- prediction(predictions = data_roc_dt$pred_prob,
labels = data_roc_dt$actual)
# nilai AUC
dt_auc <- performance(dt_roc, measure = "auc")
dt_auc@y.values[[1]]
## [1] 0.9350962
plot(performance(dt_roc, "tpr", "fpr"))
abline(0, 1, lty = 2)
text(0.4, 0.6, paste("AUC = ", round(dt_auc@y.values[[1]], 2)))
### 6. Random Forest
set.seed(417)
ctrl <- trainControl(method = "repeatedcv",
number = 5, # k-fold
repeats = 3) # repetisi
purchase_forest <- train(Purchased ~ .,
data = purchase_train,
method = "rf", # random forest
trControl = ctrl)
## Warning in (function (kind = NULL, normal.kind = NULL, sample.kind = NULL) :
## non-uniform 'Rounding' sampler used
pred_train_rf <- predict(purchase_forest, newdata = purchase_train)
confusionMatrix(pred_train_rf, reference = purchase_train$Purchased, positive = "Yes")
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 167 28
## Yes 38 177
##
## Accuracy : 0.839
## 95% CI : (0.7998, 0.8733)
## No Information Rate : 0.5
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.678
##
## Mcnemar's Test P-Value : 0.2679
##
## Sensitivity : 0.8634
## Specificity : 0.8146
## Pos Pred Value : 0.8233
## Neg Pred Value : 0.8564
## Prevalence : 0.5000
## Detection Rate : 0.4317
## Detection Prevalence : 0.5244
## Balanced Accuracy : 0.8390
##
## 'Positive' Class : Yes
##
pred_test_rf <- predict(purchase_forest, newdata = purchase_test)
eval_rf_test <- confusionMatrix(pred_test_rf, reference = purchase_test$Purchased, positive = "Yes")
eval_rf_test
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 44 1
## Yes 8 27
##
## Accuracy : 0.8875
## 95% CI : (0.7972, 0.9472)
## No Information Rate : 0.65
## P-Value [Acc > NIR] : 1.228e-06
##
## Kappa : 0.7662
##
## Mcnemar's Test P-Value : 0.0455
##
## Sensitivity : 0.9643
## Specificity : 0.8462
## Pos Pred Value : 0.7714
## Neg Pred Value : 0.9778
## Prevalence : 0.3500
## Detection Rate : 0.3375
## Detection Prevalence : 0.4375
## Balanced Accuracy : 0.9052
##
## 'Positive' Class : Yes
##
purchase_pred_prob <- predict(object = purchase_forest,
newdata = purchase_test,
type = "prob")
head(purchase_pred_prob)
## No Yes
## 1 0.804 0.196
## 3 0.984 0.016
## 6 0.992 0.008
## 9 0.804 0.196
## 16 0.992 0.008
## 17 0.308 0.692
data_roc_rf <- data.frame(pred_prob = purchase_pred_prob[,"Yes"],
actual = ifelse(purchase_test$Purchased == "Yes", 1, 0))
head(data_roc_rf)
## pred_prob actual
## 1 0.196 0
## 2 0.016 0
## 3 0.008 0
## 4 0.196 0
## 5 0.008 0
## 6 0.692 1
Again, we need to prepare ROC with prediction()
library(ROCR)
#object prediction
rf_roc <- prediction(predictions = data_roc_rf$pred_prob,
labels = data_roc_rf$actual)
# nilai AUC
rf_auc <- performance(rf_roc, measure = "auc")
rf_auc@y.values[[1]]
## [1] 0.9138049
plot(performance(rf_roc, "tpr", "fpr"))
abline(0, 1, lty = 2)
text(0.4, 0.6, paste("AUC = ", round(rf_auc@y.values[[1]], 2)))
eval_naive_test <- data_frame(Accuracy = eval_naive_test$overall[1],
Recall = eval_naive_test$byClass[1],
Specificity = eval_naive_test$byClass[2],
Precision = eval_naive_test$byClass[3],
AUC=naive_auc@y.values[[1]])
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
eval_dt_test <- data_frame(Accuracy = eval_dt_test$overall[1],
Recall = eval_dt_test$byClass[1],
Specificity = eval_dt_test$byClass[2],
Precision = eval_dt_test$byClass[3],
AUC=dt_auc@y.values[[1]])
eval_rf_test <- data_frame(Accuracy = eval_rf_test$overall[1],
Recall = eval_rf_test$byClass[1],
Specificity = eval_rf_test$byClass[2],
Precision = eval_rf_test$byClass[3],
AUC=rf_auc@y.values[[1]])
b <- rbind("Naive Bayes" = eval_naive_test, "Decision Tree" = eval_dt_test, "Random Forest" = eval_rf_test)
cbind(b)
## Accuracy Recall Specificity Precision AUC
## Naive Bayes 0.9125 0.9642857 0.8846154 0.8181818 0.9330357
## Decision Tree 0.8875 0.9642857 0.8461538 0.7714286 0.9350962
## Random Forest 0.8875 0.9642857 0.8461538 0.7714286 0.9138049
Based on confusion matrix of data test, we can conclude that in this case Naive Bayes is the best model to be used for prediction, with highest accuracy, recall, specificity, precision, and AUC than other model. But overall, all model already gave a good prediction results and can be used for prediction.