-age
-job - type of job
(categorical:‘admin.’,‘blue-collar’,‘entrepreneur’,‘housemaid’,‘management’,‘retired’,‘self-employed’,‘services’,‘student’,‘technician’,‘unemployed’,‘unknown’)
-marital - marital status (categorical:
‘divorced’,‘married’,‘single’,‘unknown’; note: ‘divorced’ means divorced
or widowed)
-education - (categorical:
‘basic.4y’,‘basic.6y’,‘basic.9y’,‘high.school’,‘illiterate’,‘professional.course’,‘university.degree’,‘unknown’)
-default - has credit in default?
-balance - average yearly balance
-housing - has housing loan?
-loan - has personal loan?
-contact - contact communication type (categorical:
‘cellular’,‘telephone’) -day_of_week - last contact day of the week
-month - last contact month of year (categorical: ‘jan’, ‘feb’, ‘mar’,
…, ‘nov’, ‘dec’)
-duration - last contact duration, in seconds (numeric) -day - date of
the month -campaign - number of contacts performed during this campaign
and for this client (numeric, includes last contact)
-pdays - number of days that passed by after the client was last
contacted from a previous campaign (numeric; -1 means client was not
previously contacted) -previous - number of contacts performed before
this campaign and for this client
-poutcome - outcome of the previous marketing campaign (categorical:
‘failure’,‘nonexistent’,‘success’) -y - has the client subscribed a term
deposit?
library(readr)
library(dplyr)
library(rpart)
library(rpart.plot)
library(randomForest)
library(party)
library(caret)
library(caTools)
library(pROC)
bank.data = read_delim("bank.csv", delim = ";",
escape_double = FALSE, trim_ws = TRUE)
Inspecting the class of each variable
sapply(bank.data, class)
## age job marital education default balance
## "numeric" "character" "character" "character" "character" "numeric"
## housing loan contact day month duration
## "character" "character" "character" "numeric" "character" "numeric"
## campaign pdays previous poutcome y
## "numeric" "numeric" "numeric" "character" "character"
Adjusting all binary variables into factors
bank.data$default = as.factor(bank.data$default)
bank.data$housing = as.factor(bank.data$housing)
bank.data$loan = as.factor(bank.data$loan)
bank.data$y = as.factor(bank.data$y)
Checking for any NA values in the dataset
any(is.na(bank.data)) # Finding if there are any NA values in our dataset
## [1] FALSE
Upon inspection, there are unknown inputs across different columns in the dataset.
Some inputs are unknown. Therefore I will remove them since we have a relatively large number of observations
adj.bank.data = subset(bank.data, contact!= "unknown")
adj.bank.data = subset(adj.bank.data, job!= "unknown")
adj.bank.data = subset(adj.bank.data, marital!= "unknown")
adj.bank.data = subset(adj.bank.data, education!= "unknown")
adj.bank.data = subset(adj.bank.data, loan!= "unknown")
adj.bank.data = subset(adj.bank.data, month!= "unknown")
A large number of the outcomes of the previous marketing campaign are unknown which isn’t very useful in comparative analysis across levels.
print(sum(bank.data$poutcome == "unknown")) # Finding the number of outcomes of previous marketing campaign which are unknown
## [1] 3705
Remove unnecessary columns
adj.bank.data$poutcome = NULL
adj.bank.data$day = NULL # Removing date of the month
adj.bank.data$month = NULL
A large number of previous number of days that passed by after the client was last contacted from a previous campaign are -1, indicating most of the clients have not been contacted before. Therefore, I will remove pdays.
print(sum(bank.data$pdays == -1))
## [1] 3705
# Remove pdays column
adj.bank.data$pdays = NULL
Is the dataset imbalanced?
ggplot(data = adj.bank.data, mapping = aes(y=y))+
geom_bar(fill="orange")+
labs(x="Count", y="Result Type", title="Count of clients subscribing to a term deposit vs not subscribing")+
theme(plot.title = element_text(hjust = 0.5))
Massively imbalanced dataset where there more negative case is the majority class, and the positive case is the minority class.
Upsampling the minority class (yes to subscribing) to create a balanced dataset
balanced_data = upSample(x = adj.bank.data[, -which(names(adj.bank.data) == "y")],
y = adj.bank.data$y)
colnames(balanced_data)[colnames(balanced_data) == "Class"] = "y"
Double checking the dataset is balanced
table(balanced_data$y)
##
## no yes
## 2626 2626
Splitting the data into training and test sets
# Splitting data in train and test data
split = sample.split(balanced_data$y, SplitRatio = 0.7) # 70-30 split for training vs test set
train.data = subset(balanced_data, split == "TRUE") # Subsetting dataset into training set
test.data = subset(balanced_data, split == "FALSE") # Subsetting dataset into test set
test.y = test.data$y
Double checking the response is now balanced between positive and negative class.
table(train.data$y)
##
## no yes
## 1838 1838
Fitting a decision tree with all possible variables
tree_model = rpart(y ~ ., data = train.data, method = "class")
print(tree_model) # Inspecting the model
## n= 3676
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 3676 1838 no (0.5000000 0.5000000)
## 2) duration< 212.5 1438 272 no (0.8108484 0.1891516) *
## 3) duration>=212.5 2238 672 yes (0.3002681 0.6997319)
## 6) duration< 638.5 1575 593 yes (0.3765079 0.6234921)
## 12) previous< 0.5 995 461 yes (0.4633166 0.5366834)
## 24) duration< 349.5 468 194 no (0.5854701 0.4145299) *
## 25) duration>=349.5 527 187 yes (0.3548387 0.6451613) *
## 13) previous>=0.5 580 132 yes (0.2275862 0.7724138) *
## 7) duration>=638.5 663 79 yes (0.1191554 0.8808446) *
plot(tree_model) # Visualising the tree
Creating predictions and confusion matrix based on the decision tree
predictions = predict(tree_model, newdata = test.data, type = "class") # Predictions
conf_matrix = confusionMatrix(predictions, test.y) # Building Confusion matrix
print(conf_matrix$byClass)
## Sensitivity Specificity Pos Pred Value
## 0.7804569 0.7487310 0.7564576
## Neg Pred Value Precision Recall
## 0.7732634 0.7564576 0.7804569
## F1 Prevalence Detection Rate
## 0.7682698 0.5000000 0.3902284
## Detection Prevalence Balanced Accuracy
## 0.5158629 0.7645939
Not amazing predictive statistics from the decision tree, but we can do better.
How about a Random Forest method? Random Forest delivers highly accurate predictions even with large datasets, effectively handles missing data without sacrificing accuracy, eliminates the need for normalization or standardisation, and reduces the risk of overfitting by combining multiple decision trees.
rf.model = randomForest(y~., data = train.data, importance = TRUE)
Inspecting the random forest model
print(rf.model)
##
## Call:
## randomForest(formula = y ~ ., data = train.data, importance = TRUE)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 3
##
## OOB estimate of error rate: 5.79%
## Confusion matrix:
## no yes class.error
## no 1634 204 0.110990207
## yes 9 1829 0.004896627
The OBB error rate suggests that the model performs well, as the error rate is relatively low.
Looking at the error rates
plot(rf.model)
The black line in the plot represents the overall OOB (Out-of-Bag) error
rate, while the red dashed line corresponds to the error rate for the
negative class The green dashed line indicates the error rate for the
positive class As the number of trees increases, the error rates
stabilise, showing that the random forest has enough trees to generalise
effectively. The overall OOB error, represented by the black line,
stabilises around 5%, which aligns with the previously observed result
of 5.41%. For the class-specific error rates, the error for the “no”
class, shown by the red dashed line, is higher and stabilizes around
10%, reflecting the higher class error seen in the confusion matrix. On
the other hand, the error for the “yes” class, depicted by the green
dashed line, is significantly lower, stabilising near 0.6%, which is
consistent with its strong predictive performance.
Checking the important features
# Feature importance
importance(rf.model)
## no yes MeanDecreaseAccuracy MeanDecreaseGini
## age 5.9164637 108.23247 100.46208 219.257887
## job 0.8564489 73.25825 71.50528 106.964251
## marital -1.3525856 57.73530 56.14942 54.466245
## education 5.1809022 55.99843 56.27253 51.678981
## default 1.3002853 19.60377 19.16738 4.155275
## balance 2.7511902 85.99423 83.82398 253.325131
## housing 16.0325152 60.76119 61.48763 46.116248
## loan 5.7819832 51.66248 50.95684 31.371918
## contact 2.7465809 40.79407 39.53964 18.154921
## duration 103.6381403 181.86567 181.39786 733.315701
## campaign 4.2998761 78.35196 76.82255 103.569182
## previous 26.7243314 87.39577 88.00116 107.582614
varImpPlot(rf.model)
duration is the most critical variable, as excluding it would lead to
the largest drop in accuracy, followed by age and balance (average of
the two plots). Similarly, duration is the most important variable when
it comes to contributing to the homogeneity of the nodes and leaves in
the decision trees.
The bottom 3 variables in both plots: default, contact, and loan are the least important when it comes to a drop in accuracy and the Gini.
Therefore, I’ll refit the model and omit default, contact, and loan.
new.train = train.data
# new training data without 'default', 'contact', and 'loan'.
new.train$default = NULL
new.train$contact = NULL
new.train$loan = NULL
new.test = test.data
# new test data without 'default', 'contact', and 'loan'.
new.test$default = NULL
new.test$contact = NULL
new.test$loan = NULL
# adjusted Random Forest model without default, contact, and loan
adj.rf.model = randomForest(y~., data = new.train, importance=TRUE)
Inspecting the adjusted random forest model
# Evaluate the model
print(adj.rf.model)
##
## Call:
## randomForest(formula = y ~ ., data = new.train, importance = TRUE)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 3
##
## OOB estimate of error rate: 5.66%
## Confusion matrix:
## no yes class.error
## no 1642 196 0.106637650
## yes 12 1826 0.006528836
A lower OOB error rate indicates an improvement from the original model.
plot(adj.rf.model)
Similar story to the original model rf.model.
Let’s inspect the relationship between the most important variables and the probability of a yes to subscribing to the term deposits.
Starting with duration
# Inspecting the relationship of the last contact duration
partialPlot(adj.rf.model, new.train, x.var = "duration")
As the length of the last contact duration increases, the probability of a subscription to the term deposit decreases. Key message: keep marketing short and sweet to minimise a ‘no’ to the term deposit subscription.
How about age?
partialPlot(adj.rf.model, new.train, x.var = "age")
Upwards overall trend until just before 45, then there is a downwards trend up until 50. Moreover, from 50 to 60, there is an overall upwards trend with some dips in between. Just before age 60, there is a sharp downwards dip until just after age 60. Finally, there is a gradual rise post-60, then some indication of convergence. Overall, from ages 20 to 45, there is the highest probability of attaining a ‘yes’ while post-60 onwards is the lowest probability.
What about balance - average yearly balance?
# Inspecting the relationship of the last contact duration
partialPlot(adj.rf.model, new.train, x.var = "balance")
Looking from 0 onwards, there is an increasing trend as balance increases. In other words, the probability of a ‘yes’ to a subscription towards term deposits increases as balance increases.
Creating predictions and constructing the confusion matrix for the Random Forest model
# Creating predictions based on the fitted model and test data
predictions.rf = predict(adj.rf.model, newdata = new.test, type = "class")
conf_matrix.rf = confusionMatrix(predictions.rf, test.y) # Creating the confusion matrix for the Random Forest model
print(conf_matrix.rf$byClass)
## Sensitivity Specificity Pos Pred Value
## 0.9149746 1.0000000 1.0000000
## Neg Pred Value Precision Recall
## 0.9216374 1.0000000 0.9149746
## F1 Prevalence Detection Rate
## 0.9555997 0.5000000 0.4574873
## Detection Prevalence Balanced Accuracy
## 0.4574873 0.9574873
Good statistics.
# Estimate of AUC
roc_curve = roc(test.y, as.numeric(predictions.rf)) # ROC curve
## Setting levels: control = no, case = yes
## Setting direction: controls < cases
plot(roc_curve, grid=TRUE, col="orange",print.thres = "best") #Plot ROC curve alongside the point that maximises both sensitivity and specificity
What’s the AUC for the ROC curve?
print(auc(roc_curve))
## Area under the curve: 0.9575
The adjusted Random Forest model (adj.rf.model) is a great predictor of the binary classes. The model is good at distinguishing between a ‘yes’ or a ‘no’ in the response.
What about the error rate of the Random Forest model?
adj.rf.model$err.rate
## OOB no yes
## [1,] 0.11194030 0.2121685 0.020028612
## [2,] 0.10468320 0.1865672 0.025316456
## [3,] 0.10404412 0.1713235 0.036764706
## [4,] 0.09843546 0.1611863 0.034278181
## [5,] 0.09608972 0.1605311 0.031059683
## [6,] 0.09082995 0.1567034 0.024956471
## [7,] 0.08547976 0.1465761 0.024348811
## [8,] 0.08756274 0.1505017 0.024553571
## [9,] 0.08568270 0.1480865 0.023691460
## [10,] 0.08459215 0.1465565 0.023001095
## [11,] 0.08023001 0.1410538 0.019672131
## [12,] 0.07839388 0.1378556 0.019094381
## [13,] 0.07553859 0.1314785 0.019629226
## [14,] 0.07382185 0.1286805 0.019052803
## [15,] 0.07487068 0.1323529 0.017419706
## [16,] 0.07510204 0.1360174 0.014153511
## [17,] 0.07344940 0.1354733 0.011425462
## [18,] 0.07263330 0.1316649 0.013601741
## [19,] 0.06991295 0.1289445 0.010881393
## [20,] 0.06773667 0.1240479 0.011425462
## [21,] 0.06719260 0.1235038 0.010881393
## [22,] 0.06556039 0.1202394 0.010881393
## [23,] 0.06338411 0.1169750 0.009793254
## [24,] 0.06365615 0.1175190 0.009793254
## [25,] 0.06447225 0.1180631 0.010881393
## [26,] 0.06501632 0.1202394 0.009793254
## [27,] 0.06093580 0.1126224 0.009249184
## [28,] 0.06066376 0.1126224 0.008705114
## [29,] 0.06066376 0.1120783 0.009249184
## [30,] 0.06284004 0.1169750 0.008705114
## [31,] 0.06338411 0.1175190 0.009249184
## [32,] 0.05984766 0.1120783 0.007616975
## [33,] 0.06011970 0.1115343 0.008705114
## [34,] 0.06284004 0.1175190 0.008161045
## [35,] 0.06147987 0.1142546 0.008705114
## [36,] 0.06066376 0.1120783 0.009249184
## [37,] 0.06011970 0.1109902 0.009249184
## [38,] 0.05984766 0.1093580 0.010337323
## [39,] 0.05984766 0.1093580 0.010337323
## [40,] 0.06093580 0.1109902 0.010881393
## [41,] 0.06120783 0.1137106 0.008705114
## [42,] 0.05984766 0.1099021 0.009793254
## [43,] 0.05930359 0.1093580 0.009249184
## [44,] 0.06066376 0.1120783 0.009249184
## [45,] 0.05848749 0.1082699 0.008705114
## [46,] 0.05957563 0.1104461 0.008705114
## [47,] 0.06039173 0.1120783 0.008705114
## [48,] 0.05875952 0.1099021 0.007616975
## [49,] 0.06284004 0.1158868 0.009793254
## [50,] 0.06175190 0.1147987 0.008705114
## [51,] 0.06120783 0.1142546 0.008161045
## [52,] 0.06147987 0.1147987 0.008161045
## [53,] 0.06175190 0.1142546 0.009249184
## [54,] 0.06284004 0.1169750 0.008705114
## [55,] 0.06229597 0.1164309 0.008161045
## [56,] 0.06120783 0.1147987 0.007616975
## [57,] 0.06120783 0.1142546 0.008161045
## [58,] 0.06093580 0.1131665 0.008705114
## [59,] 0.06066376 0.1126224 0.008705114
## [60,] 0.06066376 0.1120783 0.009249184
## [61,] 0.05930359 0.1099021 0.008705114
## [62,] 0.05984766 0.1115343 0.008161045
## [63,] 0.05875952 0.1093580 0.008161045
## [64,] 0.05984766 0.1109902 0.008705114
## [65,] 0.05957563 0.1120783 0.007072905
## [66,] 0.06011970 0.1126224 0.007616975
## [67,] 0.06011970 0.1126224 0.007616975
## [68,] 0.06066376 0.1137106 0.007616975
## [69,] 0.05903156 0.1104461 0.007616975
## [70,] 0.05821545 0.1093580 0.007072905
## [71,] 0.05903156 0.1109902 0.007072905
## [72,] 0.05821545 0.1093580 0.007072905
## [73,] 0.05984766 0.1126224 0.007072905
## [74,] 0.05821545 0.1099021 0.006528836
## [75,] 0.05848749 0.1104461 0.006528836
## [76,] 0.05848749 0.1104461 0.006528836
## [77,] 0.05848749 0.1104461 0.006528836
## [78,] 0.05821545 0.1099021 0.006528836
## [79,] 0.05821545 0.1099021 0.006528836
## [80,] 0.05821545 0.1093580 0.007072905
## [81,] 0.05903156 0.1115343 0.006528836
## [82,] 0.05903156 0.1115343 0.006528836
## [83,] 0.05794342 0.1093580 0.006528836
## [84,] 0.05903156 0.1115343 0.006528836
## [85,] 0.05848749 0.1104461 0.006528836
## [86,] 0.05848749 0.1104461 0.006528836
## [87,] 0.05875952 0.1109902 0.006528836
## [88,] 0.05984766 0.1131665 0.006528836
## [89,] 0.05875952 0.1109902 0.006528836
## [90,] 0.05821545 0.1099021 0.006528836
## [91,] 0.05848749 0.1104461 0.006528836
## [92,] 0.05848749 0.1099021 0.007072905
## [93,] 0.05821545 0.1099021 0.006528836
## [94,] 0.05767138 0.1088139 0.006528836
## [95,] 0.05875952 0.1109902 0.006528836
## [96,] 0.05794342 0.1093580 0.006528836
## [97,] 0.05767138 0.1088139 0.006528836
## [98,] 0.05767138 0.1088139 0.006528836
## [99,] 0.05712731 0.1077258 0.006528836
## [100,] 0.05712731 0.1077258 0.006528836
## [101,] 0.05767138 0.1088139 0.006528836
## [102,] 0.05767138 0.1088139 0.006528836
## [103,] 0.05794342 0.1093580 0.006528836
## [104,] 0.05767138 0.1088139 0.006528836
## [105,] 0.05767138 0.1088139 0.006528836
## [106,] 0.05794342 0.1093580 0.006528836
## [107,] 0.05739935 0.1082699 0.006528836
## [108,] 0.05767138 0.1088139 0.006528836
## [109,] 0.05767138 0.1088139 0.006528836
## [110,] 0.05739935 0.1082699 0.006528836
## [111,] 0.05767138 0.1082699 0.007072905
## [112,] 0.05767138 0.1082699 0.007072905
## [113,] 0.05739935 0.1077258 0.007072905
## [114,] 0.05712731 0.1077258 0.006528836
## [115,] 0.05794342 0.1093580 0.006528836
## [116,] 0.05631121 0.1060936 0.006528836
## [117,] 0.05767138 0.1088139 0.006528836
## [118,] 0.05685528 0.1071817 0.006528836
## [119,] 0.05712731 0.1077258 0.006528836
## [120,] 0.05739935 0.1082699 0.006528836
## [121,] 0.05739935 0.1082699 0.006528836
## [122,] 0.05739935 0.1082699 0.006528836
## [123,] 0.05712731 0.1077258 0.006528836
## [124,] 0.05767138 0.1088139 0.006528836
## [125,] 0.05739935 0.1082699 0.006528836
## [126,] 0.05767138 0.1088139 0.006528836
## [127,] 0.05739935 0.1082699 0.006528836
## [128,] 0.05794342 0.1093580 0.006528836
## [129,] 0.05767138 0.1088139 0.006528836
## [130,] 0.05767138 0.1088139 0.006528836
## [131,] 0.05794342 0.1093580 0.006528836
## [132,] 0.05739935 0.1082699 0.006528836
## [133,] 0.05821545 0.1099021 0.006528836
## [134,] 0.05767138 0.1088139 0.006528836
## [135,] 0.05739935 0.1082699 0.006528836
## [136,] 0.05794342 0.1093580 0.006528836
## [137,] 0.05848749 0.1104461 0.006528836
## [138,] 0.05821545 0.1099021 0.006528836
## [139,] 0.05821545 0.1099021 0.006528836
## [140,] 0.05848749 0.1104461 0.006528836
## [141,] 0.05903156 0.1115343 0.006528836
## [142,] 0.05848749 0.1104461 0.006528836
## [143,] 0.05848749 0.1104461 0.006528836
## [144,] 0.05821545 0.1099021 0.006528836
## [145,] 0.05821545 0.1099021 0.006528836
## [146,] 0.05848749 0.1104461 0.006528836
## [147,] 0.05875952 0.1109902 0.006528836
## [148,] 0.05903156 0.1115343 0.006528836
## [149,] 0.05903156 0.1115343 0.006528836
## [150,] 0.05930359 0.1120783 0.006528836
## [151,] 0.05930359 0.1120783 0.006528836
## [152,] 0.05848749 0.1104461 0.006528836
## [153,] 0.05848749 0.1104461 0.006528836
## [154,] 0.05930359 0.1120783 0.006528836
## [155,] 0.05903156 0.1115343 0.006528836
## [156,] 0.05848749 0.1104461 0.006528836
## [157,] 0.05848749 0.1104461 0.006528836
## [158,] 0.05848749 0.1104461 0.006528836
## [159,] 0.05875952 0.1109902 0.006528836
## [160,] 0.05957563 0.1126224 0.006528836
## [161,] 0.05875952 0.1109902 0.006528836
## [162,] 0.05930359 0.1120783 0.006528836
## [163,] 0.05875952 0.1109902 0.006528836
## [164,] 0.05903156 0.1115343 0.006528836
## [165,] 0.05875952 0.1109902 0.006528836
## [166,] 0.05930359 0.1120783 0.006528836
## [167,] 0.05875952 0.1109902 0.006528836
## [168,] 0.05821545 0.1099021 0.006528836
## [169,] 0.05767138 0.1088139 0.006528836
## [170,] 0.05821545 0.1099021 0.006528836
## [171,] 0.05767138 0.1088139 0.006528836
## [172,] 0.05712731 0.1077258 0.006528836
## [173,] 0.05767138 0.1088139 0.006528836
## [174,] 0.05767138 0.1088139 0.006528836
## [175,] 0.05739935 0.1082699 0.006528836
## [176,] 0.05685528 0.1071817 0.006528836
## [177,] 0.05739935 0.1082699 0.006528836
## [178,] 0.05794342 0.1093580 0.006528836
## [179,] 0.05685528 0.1071817 0.006528836
## [180,] 0.05712731 0.1077258 0.006528836
## [181,] 0.05685528 0.1071817 0.006528836
## [182,] 0.05712731 0.1077258 0.006528836
## [183,] 0.05631121 0.1060936 0.006528836
## [184,] 0.05631121 0.1060936 0.006528836
## [185,] 0.05658324 0.1066376 0.006528836
## [186,] 0.05631121 0.1060936 0.006528836
## [187,] 0.05658324 0.1066376 0.006528836
## [188,] 0.05603917 0.1055495 0.006528836
## [189,] 0.05631121 0.1060936 0.006528836
## [190,] 0.05631121 0.1060936 0.006528836
## [191,] 0.05631121 0.1060936 0.006528836
## [192,] 0.05685528 0.1071817 0.006528836
## [193,] 0.05631121 0.1060936 0.006528836
## [194,] 0.05658324 0.1066376 0.006528836
## [195,] 0.05631121 0.1060936 0.006528836
## [196,] 0.05685528 0.1071817 0.006528836
## [197,] 0.05685528 0.1071817 0.006528836
## [198,] 0.05712731 0.1082699 0.005984766
## [199,] 0.05658324 0.1066376 0.006528836
## [200,] 0.05631121 0.1060936 0.006528836
## [201,] 0.05603917 0.1060936 0.005984766
## [202,] 0.05603917 0.1060936 0.005984766
## [203,] 0.05685528 0.1071817 0.006528836
## [204,] 0.05631121 0.1066376 0.005984766
## [205,] 0.05685528 0.1071817 0.006528836
## [206,] 0.05631121 0.1066376 0.005984766
## [207,] 0.05603917 0.1055495 0.006528836
## [208,] 0.05603917 0.1055495 0.006528836
## [209,] 0.05603917 0.1060936 0.005984766
## [210,] 0.05685528 0.1071817 0.006528836
## [211,] 0.05658324 0.1071817 0.005984766
## [212,] 0.05603917 0.1055495 0.006528836
## [213,] 0.05685528 0.1071817 0.006528836
## [214,] 0.05631121 0.1060936 0.006528836
## [215,] 0.05631121 0.1060936 0.006528836
## [216,] 0.05631121 0.1060936 0.006528836
## [217,] 0.05658324 0.1066376 0.006528836
## [218,] 0.05739935 0.1082699 0.006528836
## [219,] 0.05712731 0.1077258 0.006528836
## [220,] 0.05685528 0.1071817 0.006528836
## [221,] 0.05712731 0.1082699 0.005984766
## [222,] 0.05685528 0.1077258 0.005984766
## [223,] 0.05658324 0.1066376 0.006528836
## [224,] 0.05658324 0.1066376 0.006528836
## [225,] 0.05658324 0.1071817 0.005984766
## [226,] 0.05767138 0.1093580 0.005984766
## [227,] 0.05685528 0.1077258 0.005984766
## [228,] 0.05631121 0.1066376 0.005984766
## [229,] 0.05658324 0.1071817 0.005984766
## [230,] 0.05712731 0.1082699 0.005984766
## [231,] 0.05685528 0.1071817 0.006528836
## [232,] 0.05685528 0.1077258 0.005984766
## [233,] 0.05658324 0.1071817 0.005984766
## [234,] 0.05631121 0.1060936 0.006528836
## [235,] 0.05685528 0.1071817 0.006528836
## [236,] 0.05631121 0.1060936 0.006528836
## [237,] 0.05631121 0.1060936 0.006528836
## [238,] 0.05685528 0.1071817 0.006528836
## [239,] 0.05685528 0.1071817 0.006528836
## [240,] 0.05658324 0.1066376 0.006528836
## [241,] 0.05739935 0.1082699 0.006528836
## [242,] 0.05767138 0.1088139 0.006528836
## [243,] 0.05739935 0.1082699 0.006528836
## [244,] 0.05767138 0.1088139 0.006528836
## [245,] 0.05821545 0.1099021 0.006528836
## [246,] 0.05794342 0.1093580 0.006528836
## [247,] 0.05739935 0.1088139 0.005984766
## [248,] 0.05821545 0.1099021 0.006528836
## [249,] 0.05739935 0.1088139 0.005984766
## [250,] 0.05739935 0.1082699 0.006528836
## [251,] 0.05712731 0.1082699 0.005984766
## [252,] 0.05767138 0.1088139 0.006528836
## [253,] 0.05767138 0.1088139 0.006528836
## [254,] 0.05794342 0.1093580 0.006528836
## [255,] 0.05739935 0.1082699 0.006528836
## [256,] 0.05712731 0.1077258 0.006528836
## [257,] 0.05794342 0.1093580 0.006528836
## [258,] 0.05739935 0.1082699 0.006528836
## [259,] 0.05767138 0.1088139 0.006528836
## [260,] 0.05767138 0.1088139 0.006528836
## [261,] 0.05821545 0.1099021 0.006528836
## [262,] 0.05821545 0.1104461 0.005984766
## [263,] 0.05767138 0.1088139 0.006528836
## [264,] 0.05767138 0.1088139 0.006528836
## [265,] 0.05794342 0.1093580 0.006528836
## [266,] 0.05821545 0.1099021 0.006528836
## [267,] 0.05794342 0.1099021 0.005984766
## [268,] 0.05821545 0.1104461 0.005984766
## [269,] 0.05767138 0.1093580 0.005984766
## [270,] 0.05767138 0.1093580 0.005984766
## [271,] 0.05767138 0.1093580 0.005984766
## [272,] 0.05767138 0.1093580 0.005984766
## [273,] 0.05739935 0.1088139 0.005984766
## [274,] 0.05794342 0.1099021 0.005984766
## [275,] 0.05794342 0.1099021 0.005984766
## [276,] 0.05767138 0.1093580 0.005984766
## [277,] 0.05767138 0.1093580 0.005984766
## [278,] 0.05767138 0.1093580 0.005984766
## [279,] 0.05767138 0.1093580 0.005984766
## [280,] 0.05739935 0.1088139 0.005984766
## [281,] 0.05739935 0.1088139 0.005984766
## [282,] 0.05712731 0.1082699 0.005984766
## [283,] 0.05739935 0.1088139 0.005984766
## [284,] 0.05767138 0.1093580 0.005984766
## [285,] 0.05767138 0.1093580 0.005984766
## [286,] 0.05739935 0.1088139 0.005984766
## [287,] 0.05739935 0.1088139 0.005984766
## [288,] 0.05767138 0.1093580 0.005984766
## [289,] 0.05631121 0.1066376 0.005984766
## [290,] 0.05739935 0.1088139 0.005984766
## [291,] 0.05712731 0.1082699 0.005984766
## [292,] 0.05739935 0.1088139 0.005984766
## [293,] 0.05739935 0.1088139 0.005984766
## [294,] 0.05767138 0.1093580 0.005984766
## [295,] 0.05685528 0.1077258 0.005984766
## [296,] 0.05685528 0.1077258 0.005984766
## [297,] 0.05712731 0.1082699 0.005984766
## [298,] 0.05767138 0.1088139 0.006528836
## [299,] 0.05739935 0.1088139 0.005984766
## [300,] 0.05739935 0.1088139 0.005984766
## [301,] 0.05739935 0.1082699 0.006528836
## [302,] 0.05739935 0.1082699 0.006528836
## [303,] 0.05794342 0.1093580 0.006528836
## [304,] 0.05739935 0.1082699 0.006528836
## [305,] 0.05767138 0.1088139 0.006528836
## [306,] 0.05794342 0.1093580 0.006528836
## [307,] 0.05712731 0.1077258 0.006528836
## [308,] 0.05767138 0.1088139 0.006528836
## [309,] 0.05767138 0.1088139 0.006528836
## [310,] 0.05739935 0.1082699 0.006528836
## [311,] 0.05794342 0.1093580 0.006528836
## [312,] 0.05794342 0.1093580 0.006528836
## [313,] 0.05794342 0.1093580 0.006528836
## [314,] 0.05794342 0.1099021 0.005984766
## [315,] 0.05712731 0.1082699 0.005984766
## [316,] 0.05739935 0.1088139 0.005984766
## [317,] 0.05767138 0.1093580 0.005984766
## [318,] 0.05712731 0.1082699 0.005984766
## [319,] 0.05712731 0.1082699 0.005984766
## [320,] 0.05739935 0.1088139 0.005984766
## [321,] 0.05739935 0.1088139 0.005984766
## [322,] 0.05712731 0.1082699 0.005984766
## [323,] 0.05712731 0.1082699 0.005984766
## [324,] 0.05712731 0.1082699 0.005984766
## [325,] 0.05794342 0.1093580 0.006528836
## [326,] 0.05767138 0.1093580 0.005984766
## [327,] 0.05821545 0.1099021 0.006528836
## [328,] 0.05794342 0.1093580 0.006528836
## [329,] 0.05739935 0.1082699 0.006528836
## [330,] 0.05767138 0.1088139 0.006528836
## [331,] 0.05767138 0.1088139 0.006528836
## [332,] 0.05767138 0.1088139 0.006528836
## [333,] 0.05739935 0.1082699 0.006528836
## [334,] 0.05767138 0.1088139 0.006528836
## [335,] 0.05767138 0.1088139 0.006528836
## [336,] 0.05767138 0.1088139 0.006528836
## [337,] 0.05767138 0.1088139 0.006528836
## [338,] 0.05767138 0.1088139 0.006528836
## [339,] 0.05821545 0.1099021 0.006528836
## [340,] 0.05767138 0.1088139 0.006528836
## [341,] 0.05739935 0.1082699 0.006528836
## [342,] 0.05794342 0.1093580 0.006528836
## [343,] 0.05794342 0.1093580 0.006528836
## [344,] 0.05767138 0.1088139 0.006528836
## [345,] 0.05739935 0.1082699 0.006528836
## [346,] 0.05739935 0.1082699 0.006528836
## [347,] 0.05767138 0.1088139 0.006528836
## [348,] 0.05739935 0.1082699 0.006528836
## [349,] 0.05767138 0.1088139 0.006528836
## [350,] 0.05739935 0.1082699 0.006528836
## [351,] 0.05767138 0.1088139 0.006528836
## [352,] 0.05767138 0.1088139 0.006528836
## [353,] 0.05767138 0.1088139 0.006528836
## [354,] 0.05739935 0.1082699 0.006528836
## [355,] 0.05767138 0.1088139 0.006528836
## [356,] 0.05712731 0.1077258 0.006528836
## [357,] 0.05685528 0.1071817 0.006528836
## [358,] 0.05685528 0.1071817 0.006528836
## [359,] 0.05712731 0.1077258 0.006528836
## [360,] 0.05712731 0.1077258 0.006528836
## [361,] 0.05712731 0.1077258 0.006528836
## [362,] 0.05767138 0.1088139 0.006528836
## [363,] 0.05685528 0.1071817 0.006528836
## [364,] 0.05658324 0.1066376 0.006528836
## [365,] 0.05658324 0.1066376 0.006528836
## [366,] 0.05685528 0.1071817 0.006528836
## [367,] 0.05658324 0.1066376 0.006528836
## [368,] 0.05658324 0.1066376 0.006528836
## [369,] 0.05658324 0.1066376 0.006528836
## [370,] 0.05658324 0.1066376 0.006528836
## [371,] 0.05631121 0.1060936 0.006528836
## [372,] 0.05631121 0.1060936 0.006528836
## [373,] 0.05658324 0.1066376 0.006528836
## [374,] 0.05631121 0.1060936 0.006528836
## [375,] 0.05658324 0.1066376 0.006528836
## [376,] 0.05631121 0.1060936 0.006528836
## [377,] 0.05658324 0.1066376 0.006528836
## [378,] 0.05658324 0.1066376 0.006528836
## [379,] 0.05685528 0.1071817 0.006528836
## [380,] 0.05631121 0.1060936 0.006528836
## [381,] 0.05631121 0.1060936 0.006528836
## [382,] 0.05658324 0.1066376 0.006528836
## [383,] 0.05658324 0.1066376 0.006528836
## [384,] 0.05712731 0.1077258 0.006528836
## [385,] 0.05631121 0.1060936 0.006528836
## [386,] 0.05658324 0.1066376 0.006528836
## [387,] 0.05685528 0.1071817 0.006528836
## [388,] 0.05685528 0.1071817 0.006528836
## [389,] 0.05631121 0.1060936 0.006528836
## [390,] 0.05739935 0.1082699 0.006528836
## [391,] 0.05712731 0.1077258 0.006528836
## [392,] 0.05712731 0.1077258 0.006528836
## [393,] 0.05685528 0.1071817 0.006528836
## [394,] 0.05658324 0.1066376 0.006528836
## [395,] 0.05685528 0.1071817 0.006528836
## [396,] 0.05658324 0.1066376 0.006528836
## [397,] 0.05658324 0.1066376 0.006528836
## [398,] 0.05631121 0.1060936 0.006528836
## [399,] 0.05685528 0.1071817 0.006528836
## [400,] 0.05685528 0.1071817 0.006528836
## [401,] 0.05658324 0.1066376 0.006528836
## [402,] 0.05658324 0.1066376 0.006528836
## [403,] 0.05685528 0.1071817 0.006528836
## [404,] 0.05685528 0.1071817 0.006528836
## [405,] 0.05712731 0.1077258 0.006528836
## [406,] 0.05712731 0.1077258 0.006528836
## [407,] 0.05712731 0.1077258 0.006528836
## [408,] 0.05712731 0.1077258 0.006528836
## [409,] 0.05739935 0.1082699 0.006528836
## [410,] 0.05712731 0.1077258 0.006528836
## [411,] 0.05739935 0.1082699 0.006528836
## [412,] 0.05712731 0.1077258 0.006528836
## [413,] 0.05685528 0.1071817 0.006528836
## [414,] 0.05712731 0.1077258 0.006528836
## [415,] 0.05712731 0.1077258 0.006528836
## [416,] 0.05767138 0.1088139 0.006528836
## [417,] 0.05739935 0.1082699 0.006528836
## [418,] 0.05767138 0.1088139 0.006528836
## [419,] 0.05767138 0.1088139 0.006528836
## [420,] 0.05685528 0.1071817 0.006528836
## [421,] 0.05712731 0.1077258 0.006528836
## [422,] 0.05658324 0.1066376 0.006528836
## [423,] 0.05685528 0.1071817 0.006528836
## [424,] 0.05739935 0.1082699 0.006528836
## [425,] 0.05685528 0.1071817 0.006528836
## [426,] 0.05712731 0.1077258 0.006528836
## [427,] 0.05685528 0.1071817 0.006528836
## [428,] 0.05685528 0.1071817 0.006528836
## [429,] 0.05712731 0.1077258 0.006528836
## [430,] 0.05685528 0.1071817 0.006528836
## [431,] 0.05631121 0.1060936 0.006528836
## [432,] 0.05658324 0.1066376 0.006528836
## [433,] 0.05685528 0.1071817 0.006528836
## [434,] 0.05658324 0.1066376 0.006528836
## [435,] 0.05631121 0.1060936 0.006528836
## [436,] 0.05631121 0.1060936 0.006528836
## [437,] 0.05658324 0.1066376 0.006528836
## [438,] 0.05631121 0.1060936 0.006528836
## [439,] 0.05603917 0.1055495 0.006528836
## [440,] 0.05603917 0.1055495 0.006528836
## [441,] 0.05603917 0.1055495 0.006528836
## [442,] 0.05576714 0.1050054 0.006528836
## [443,] 0.05576714 0.1050054 0.006528836
## [444,] 0.05549510 0.1044614 0.006528836
## [445,] 0.05603917 0.1055495 0.006528836
## [446,] 0.05576714 0.1050054 0.006528836
## [447,] 0.05631121 0.1060936 0.006528836
## [448,] 0.05685528 0.1071817 0.006528836
## [449,] 0.05603917 0.1055495 0.006528836
## [450,] 0.05712731 0.1077258 0.006528836
## [451,] 0.05685528 0.1071817 0.006528836
## [452,] 0.05658324 0.1066376 0.006528836
## [453,] 0.05712731 0.1077258 0.006528836
## [454,] 0.05739935 0.1082699 0.006528836
## [455,] 0.05712731 0.1077258 0.006528836
## [456,] 0.05712731 0.1077258 0.006528836
## [457,] 0.05658324 0.1066376 0.006528836
## [458,] 0.05631121 0.1060936 0.006528836
## [459,] 0.05603917 0.1055495 0.006528836
## [460,] 0.05685528 0.1071817 0.006528836
## [461,] 0.05631121 0.1060936 0.006528836
## [462,] 0.05658324 0.1066376 0.006528836
## [463,] 0.05685528 0.1071817 0.006528836
## [464,] 0.05685528 0.1071817 0.006528836
## [465,] 0.05603917 0.1055495 0.006528836
## [466,] 0.05685528 0.1071817 0.006528836
## [467,] 0.05576714 0.1050054 0.006528836
## [468,] 0.05549510 0.1044614 0.006528836
## [469,] 0.05576714 0.1050054 0.006528836
## [470,] 0.05549510 0.1044614 0.006528836
## [471,] 0.05576714 0.1050054 0.006528836
## [472,] 0.05522307 0.1039173 0.006528836
## [473,] 0.05495103 0.1033732 0.006528836
## [474,] 0.05576714 0.1050054 0.006528836
## [475,] 0.05576714 0.1050054 0.006528836
## [476,] 0.05576714 0.1050054 0.006528836
## [477,] 0.05576714 0.1050054 0.006528836
## [478,] 0.05631121 0.1060936 0.006528836
## [479,] 0.05603917 0.1055495 0.006528836
## [480,] 0.05576714 0.1050054 0.006528836
## [481,] 0.05603917 0.1055495 0.006528836
## [482,] 0.05576714 0.1050054 0.006528836
## [483,] 0.05603917 0.1055495 0.006528836
## [484,] 0.05603917 0.1055495 0.006528836
## [485,] 0.05631121 0.1060936 0.006528836
## [486,] 0.05576714 0.1050054 0.006528836
## [487,] 0.05603917 0.1055495 0.006528836
## [488,] 0.05603917 0.1055495 0.006528836
## [489,] 0.05603917 0.1055495 0.006528836
## [490,] 0.05603917 0.1055495 0.006528836
## [491,] 0.05603917 0.1055495 0.006528836
## [492,] 0.05603917 0.1055495 0.006528836
## [493,] 0.05631121 0.1060936 0.006528836
## [494,] 0.05685528 0.1071817 0.006528836
## [495,] 0.05658324 0.1066376 0.006528836
## [496,] 0.05658324 0.1066376 0.006528836
## [497,] 0.05712731 0.1077258 0.006528836
## [498,] 0.05631121 0.1060936 0.006528836
## [499,] 0.05658324 0.1066376 0.006528836
## [500,] 0.05658324 0.1066376 0.006528836
As more trees are added to the model, the OOB error decreases until around 0.05.
# Final model
print(adj.rf.model)
##
## Call:
## randomForest(formula = y ~ ., data = new.train, importance = TRUE)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 3
##
## OOB estimate of error rate: 5.66%
## Confusion matrix:
## no yes class.error
## no 1642 196 0.106637650
## yes 12 1826 0.006528836
The most important features of the marketing campaign is duration - last contact duration, in seconds, age, and balance - average yearly balance.
As per the PDP, the probability of a yes to the subscription to the term deposit decreases as the last contact duration increases. Continuing on the topic of PDP, with regard to ages; from ages 20 to 45, there is the highest probability of attaining a ‘yes’ while post-60 onwards is the lowest probability. There is an increasing trend as balance increases. In other words, the probability of a ‘yes’ to a subscription towards term deposits increases as balance increases.
Therefore there are 3 key takeaways from the PDP plots: - Keep the duration of the phone call short and sweet to promote likelihood of a ‘yes’ - Target ages groups 20 to 45 years old, and minimise efforts to post-60 year old clients - Call clients with sufficient and stable balances; Established clients should be prioritised.
Saving the model into a file for access
saveRDS(adj.rf.model, "bankmarketing_rf.rds")