A Portuguese bank conducted a marketing campaign (phone calls) to predict if a client will subscribe to a term deposit The records of their efforts are available in the form of a dataset. The objective here is to apply machine learning techniques to analyze the dataset and figure out most effective tactics that will help the bank in next campaign to persuade more customers to subscribe to the bank’s term deposit.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data <- read.csv('https://raw.githubusercontent.com/ddebonis47/classwork/refs/heads/main/bank-full.csv', sep = ';', stringsAsFactors = TRUE)
str(data)
## 'data.frame': 45211 obs. of 17 variables:
## $ age : int 58 44 33 47 33 35 28 42 58 43 ...
## $ job : Factor w/ 12 levels "admin.","blue-collar",..: 5 10 3 2 12 5 5 3 6 10 ...
## $ marital : Factor w/ 3 levels "divorced","married",..: 2 3 2 2 3 2 3 1 2 3 ...
## $ education: Factor w/ 4 levels "primary","secondary",..: 3 2 2 4 4 3 3 3 1 2 ...
## $ default : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 2 1 1 ...
## $ balance : int 2143 29 2 1506 1 231 447 2 121 593 ...
## $ housing : Factor w/ 2 levels "no","yes": 2 2 2 2 1 2 2 2 2 2 ...
## $ loan : Factor w/ 2 levels "no","yes": 1 1 2 1 1 1 2 1 1 1 ...
## $ contact : Factor w/ 3 levels "cellular","telephone",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ day : int 5 5 5 5 5 5 5 5 5 5 ...
## $ month : Factor w/ 12 levels "apr","aug","dec",..: 9 9 9 9 9 9 9 9 9 9 ...
## $ duration : int 261 151 76 92 198 139 217 380 50 55 ...
## $ campaign : int 1 1 1 1 1 1 1 1 1 1 ...
## $ pdays : int -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
## $ previous : int 0 0 0 0 0 0 0 0 0 0 ...
## $ poutcome : Factor w/ 4 levels "failure","other",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ y : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
summary(data)
## age job marital education
## Min. :18.00 blue-collar:9732 divorced: 5207 primary : 6851
## 1st Qu.:33.00 management :9458 married :27214 secondary:23202
## Median :39.00 technician :7597 single :12790 tertiary :13301
## Mean :40.94 admin. :5171 unknown : 1857
## 3rd Qu.:48.00 services :4154
## Max. :95.00 retired :2264
## (Other) :6835
## default balance housing loan contact
## no :44396 Min. : -8019 no :20081 no :37967 cellular :29285
## yes: 815 1st Qu.: 72 yes:25130 yes: 7244 telephone: 2906
## Median : 448 unknown :13020
## Mean : 1362
## 3rd Qu.: 1428
## Max. :102127
##
## day month duration campaign
## Min. : 1.00 may :13766 Min. : 0.0 Min. : 1.000
## 1st Qu.: 8.00 jul : 6895 1st Qu.: 103.0 1st Qu.: 1.000
## Median :16.00 aug : 6247 Median : 180.0 Median : 2.000
## Mean :15.81 jun : 5341 Mean : 258.2 Mean : 2.764
## 3rd Qu.:21.00 nov : 3970 3rd Qu.: 319.0 3rd Qu.: 3.000
## Max. :31.00 apr : 2932 Max. :4918.0 Max. :63.000
## (Other): 6060
## pdays previous poutcome y
## Min. : -1.0 Min. : 0.0000 failure: 4901 no :39922
## 1st Qu.: -1.0 1st Qu.: 0.0000 other : 1840 yes: 5289
## Median : -1.0 Median : 0.0000 success: 1511
## Mean : 40.2 Mean : 0.5803 unknown:36959
## 3rd Qu.: -1.0 3rd Qu.: 0.0000
## Max. :871.0 Max. :275.0000
##
numeric_vars <- data|>
select(where(is.numeric))
# Histograms
numeric_vars %>%
gather(key="variable", value="value") %>%
ggplot(aes(x=value)) +
geom_histogram(bins=30, fill="skyblue", color="black") +
facet_wrap(~variable, scales="free") +
theme_minimal()
cat_vars <- names(data)[sapply(data, is.factor)]
for (v in cat_vars) {
df <- data %>%
count(.data[[v]]) %>%
mutate(percent = n / sum(n) * 100)
print(
ggplot(df, aes(x=.data[[v]], y=percent)) +
geom_col(fill="steelblue") +
theme_minimal() +
labs(y="Percent", x=v) +
theme(axis.text.x = element_text(angle=45, hjust=1))
)
}
# need to scale to adjust for different ranges
numeric_scaled <- scale(numeric_vars)
numeric_scaled <- as.data.frame(numeric_scaled)
numeric_scaled_long <- numeric_scaled |>
pivot_longer(cols = everything(), names_to = "variable", values_to = "value")
ggplot(numeric_scaled_long, aes(x=variable, y=value)) +
geom_boxplot(fill="lightgreen") +
theme_minimal() +
theme(axis.text.x = element_text(angle=45, hjust=1))
median_prev <- median(data$previous, na.rm = TRUE)
data$previous <- ifelse(data$previous == 275, median_prev, data$previous)
summary(data)
## age job marital education
## Min. :18.00 blue-collar:9732 divorced: 5207 primary : 6851
## 1st Qu.:33.00 management :9458 married :27214 secondary:23202
## Median :39.00 technician :7597 single :12790 tertiary :13301
## Mean :40.94 admin. :5171 unknown : 1857
## 3rd Qu.:48.00 services :4154
## Max. :95.00 retired :2264
## (Other) :6835
## default balance housing loan contact
## no :44396 Min. : -8019 no :20081 no :37967 cellular :29285
## yes: 815 1st Qu.: 72 yes:25130 yes: 7244 telephone: 2906
## Median : 448 unknown :13020
## Mean : 1362
## 3rd Qu.: 1428
## Max. :102127
##
## day month duration campaign
## Min. : 1.00 may :13766 Min. : 0.0 Min. : 1.000
## 1st Qu.: 8.00 jul : 6895 1st Qu.: 103.0 1st Qu.: 1.000
## Median :16.00 aug : 6247 Median : 180.0 Median : 2.000
## Mean :15.81 jun : 5341 Mean : 258.2 Mean : 2.764
## 3rd Qu.:21.00 nov : 3970 3rd Qu.: 319.0 3rd Qu.: 3.000
## Max. :31.00 apr : 2932 Max. :4918.0 Max. :63.000
## (Other): 6060
## pdays previous poutcome y
## Min. : -1.0 Min. : 0.0000 failure: 4901 no :39922
## 1st Qu.: -1.0 1st Qu.: 0.0000 other : 1840 yes: 5289
## Median : -1.0 Median : 0.0000 success: 1511
## Mean : 40.2 Mean : 0.5742 unknown:36959
## 3rd Qu.: -1.0 3rd Qu.: 0.0000
## Max. :871.0 Max. :58.0000
##
sapply(data, function(x) sum(is.na(x) | x=="unknown"))
## age job marital education default balance housing loan
## 0 288 0 1857 0 0 0 0
## contact day month duration campaign pdays previous poutcome
## 13020 0 0 0 0 0 0 36959
## y
## 0
data <- data |>
mutate(pdays=na_if(pdays, -1))
summary(data)
## age job marital education
## Min. :18.00 blue-collar:9732 divorced: 5207 primary : 6851
## 1st Qu.:33.00 management :9458 married :27214 secondary:23202
## Median :39.00 technician :7597 single :12790 tertiary :13301
## Mean :40.94 admin. :5171 unknown : 1857
## 3rd Qu.:48.00 services :4154
## Max. :95.00 retired :2264
## (Other) :6835
## default balance housing loan contact
## no :44396 Min. : -8019 no :20081 no :37967 cellular :29285
## yes: 815 1st Qu.: 72 yes:25130 yes: 7244 telephone: 2906
## Median : 448 unknown :13020
## Mean : 1362
## 3rd Qu.: 1428
## Max. :102127
##
## day month duration campaign
## Min. : 1.00 may :13766 Min. : 0.0 Min. : 1.000
## 1st Qu.: 8.00 jul : 6895 1st Qu.: 103.0 1st Qu.: 1.000
## Median :16.00 aug : 6247 Median : 180.0 Median : 2.000
## Mean :15.81 jun : 5341 Mean : 258.2 Mean : 2.764
## 3rd Qu.:21.00 nov : 3970 3rd Qu.: 319.0 3rd Qu.: 3.000
## Max. :31.00 apr : 2932 Max. :4918.0 Max. :63.000
## (Other): 6060
## pdays previous poutcome y
## Min. : 1.0 Min. : 0.0000 failure: 4901 no :39922
## 1st Qu.:133.0 1st Qu.: 0.0000 other : 1840 yes: 5289
## Median :194.0 Median : 0.0000 success: 1511
## Mean :224.6 Mean : 0.5742 unknown:36959
## 3rd Qu.:327.0 3rd Qu.: 0.0000
## Max. :871.0 Max. :58.0000
## NA's :36954
numeric_vars <- data|>
select(where(is.numeric))
numeric_scaled <- scale(numeric_vars)
numeric_scaled <- as.data.frame(numeric_scaled)
numeric_scaled_long <- numeric_scaled |>
pivot_longer(cols = everything(), names_to = "variable", values_to = "value")
ggplot(numeric_scaled_long, aes(x=variable, y=value)) +
geom_boxplot(fill="lightgreen") +
theme_minimal() +
theme(axis.text.x = element_text(angle=45, hjust=1))
## Warning: Removed 36954 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
for (v in names(numeric_vars)) {
print(
ggplot(data, aes_string(x="y", y=v)) +
geom_boxplot(fill="lightgreen") +
theme_minimal()
)
}
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 36954 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
for (v in cat_vars) {
df <- data %>%
group_by(across(all_of(c(v, "y")))) %>% # group by v and y
summarise(n = n(), .groups = "drop") %>%
group_by(across(all_of(v))) %>% # group by v only for percent
mutate(percent = n / sum(n) * 100)
print(df)
print(
ggplot(df, aes(x = .data[[v]], y = percent, fill = y)) +
geom_col() +
theme_minimal() +
labs(y = "Percent", x = v) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
)
}
## # A tibble: 24 × 4
## # Groups: job [12]
## job y n percent
## <fct> <fct> <int> <dbl>
## 1 admin. no 4540 87.8
## 2 admin. yes 631 12.2
## 3 blue-collar no 9024 92.7
## 4 blue-collar yes 708 7.27
## 5 entrepreneur no 1364 91.7
## 6 entrepreneur yes 123 8.27
## 7 housemaid no 1131 91.2
## 8 housemaid yes 109 8.79
## 9 management no 8157 86.2
## 10 management yes 1301 13.8
## # ℹ 14 more rows
## # A tibble: 6 × 4
## # Groups: marital [3]
## marital y n percent
## <fct> <fct> <int> <dbl>
## 1 divorced no 4585 88.1
## 2 divorced yes 622 11.9
## 3 married no 24459 89.9
## 4 married yes 2755 10.1
## 5 single no 10878 85.1
## 6 single yes 1912 14.9
## # A tibble: 8 × 4
## # Groups: education [4]
## education y n percent
## <fct> <fct> <int> <dbl>
## 1 primary no 6260 91.4
## 2 primary yes 591 8.63
## 3 secondary no 20752 89.4
## 4 secondary yes 2450 10.6
## 5 tertiary no 11305 85.0
## 6 tertiary yes 1996 15.0
## 7 unknown no 1605 86.4
## 8 unknown yes 252 13.6
## # A tibble: 4 × 4
## # Groups: default [2]
## default y n percent
## <fct> <fct> <int> <dbl>
## 1 no no 39159 88.2
## 2 no yes 5237 11.8
## 3 yes no 763 93.6
## 4 yes yes 52 6.38
## # A tibble: 4 × 4
## # Groups: housing [2]
## housing y n percent
## <fct> <fct> <int> <dbl>
## 1 no no 16727 83.3
## 2 no yes 3354 16.7
## 3 yes no 23195 92.3
## 4 yes yes 1935 7.70
## # A tibble: 4 × 4
## # Groups: loan [2]
## loan y n percent
## <fct> <fct> <int> <dbl>
## 1 no no 33162 87.3
## 2 no yes 4805 12.7
## 3 yes no 6760 93.3
## 4 yes yes 484 6.68
## # A tibble: 6 × 4
## # Groups: contact [3]
## contact y n percent
## <fct> <fct> <int> <dbl>
## 1 cellular no 24916 85.1
## 2 cellular yes 4369 14.9
## 3 telephone no 2516 86.6
## 4 telephone yes 390 13.4
## 5 unknown no 12490 95.9
## 6 unknown yes 530 4.07
## # A tibble: 24 × 4
## # Groups: month [12]
## month y n percent
## <fct> <fct> <int> <dbl>
## 1 apr no 2355 80.3
## 2 apr yes 577 19.7
## 3 aug no 5559 89.0
## 4 aug yes 688 11.0
## 5 dec no 114 53.3
## 6 dec yes 100 46.7
## 7 feb no 2208 83.4
## 8 feb yes 441 16.6
## 9 jan no 1261 89.9
## 10 jan yes 142 10.1
## # ℹ 14 more rows
## # A tibble: 8 × 4
## # Groups: poutcome [4]
## poutcome y n percent
## <fct> <fct> <int> <dbl>
## 1 failure no 4283 87.4
## 2 failure yes 618 12.6
## 3 other no 1533 83.3
## 4 other yes 307 16.7
## 5 success no 533 35.3
## 6 success yes 978 64.7
## 7 unknown no 33573 90.8
## 8 unknown yes 3386 9.16
## # A tibble: 2 × 3
## # Groups: y [2]
## y n percent
## <fct> <int> <dbl>
## 1 no 39922 100
## 2 yes 5289 100
# Using dlookr to get correlation/association
library(dlookr)
## Registered S3 methods overwritten by 'dlookr':
## method from
## plot.transform scales
## print.transform scales
##
## Attaching package: 'dlookr'
## The following object is masked from 'package:tidyr':
##
## extract
## The following object is masked from 'package:base':
##
## transform
library(corrplot)
## corrplot 0.95 loaded
# Correlation / association
assoc_matrix <- correlate(data)
print(assoc_matrix)
## # A tibble: 42 × 3
## var1 var2 coef_corr
## <fct> <fct> <dbl>
## 1 balance age 0.0978
## 2 day age -0.00912
## 3 duration age -0.00465
## 4 campaign age 0.00476
## 5 pdays age -0.108
## 6 previous age 0.00184
## 7 age balance 0.0978
## 8 day balance 0.00450
## 9 duration balance 0.0216
## 10 campaign balance -0.0146
## # ℹ 32 more rows
# Or for numeric-only correlation
num_cor <- cor(numeric_vars, use="pairwise.complete.obs")
corrplot(num_cor, method="color", addCoef.col="black")
data$y_num <- ifelse(data$y == "yes", 1, 0)
numeric_vars <- data |>
select(where(is.numeric))
cor_matrix <- cor(numeric_vars, data$y_num, use = "pairwise.complete.obs")
cor_matrix
## [,1]
## age 0.02515502
## balance 0.05283841
## day -0.02834778
## duration 0.39452102
## campaign -0.07317201
## pdays -0.15220590
## previous 0.11372511
## y_num 1.00000000
library(DescTools)
cat_vars <- names(data)[sapply(data, is.factor)]
safeCramerV <- function(x, y) {
complete <- complete.cases(x, y)
x <- x[complete]
y <- y[complete]
if (length(x) == 0 || length(y) == 0) {
return(NA) # nothing to compute
}
return(CramerV(x, y))
}
assoc_y <- sapply(cat_vars, function(v){
safeCramerV(data[[v]], data$y)
})
assoc_y
## job marital education default housing loan contact
## 0.13599047 0.06592570 0.07269548 0.02241897 0.13917270 0.06818503 0.15135540
## month poutcome y
## 0.26023704 0.31166262 1.00000000
pairwise_assoc <- function(df) {
vars <- names(df)
result <- data.frame(var1=character(), var2=character(), association=numeric(), stringsAsFactors=FALSE)
for(i in seq_along(vars)) {
for(j in seq_along(vars)) {
v1 <- df[[vars[i]]]
v2 <- df[[vars[j]]]
# Both numeric → Pearson
if(is.numeric(v1) & is.numeric(v2)) {
assoc <- cor(v1, v2, use="pairwise.complete.obs")
}
# Both categorical → Cramer’s V
else if(is.factor(v1) & is.factor(v2)) {
assoc <- CramerV(v1, v2)
}
# One numeric, one categorical → point-biserial (numeric vs binary)
else if(is.numeric(v1) & is.factor(v2) & length(levels(v2))==2) {
assoc <- cor(v1, as.numeric(v2)-1, use="pairwise.complete.obs")
}
else if(is.factor(v1) & length(levels(v1))==2 & is.numeric(v2)) {
assoc <- cor(as.numeric(v1)-1, v2, use="pairwise.complete.obs")
}
# Otherwise → NA (e.g., categorical with >2 levels vs numeric)
else {
assoc <- NA
}
result <- rbind(result, data.frame(var1=vars[i], var2=vars[j], association=assoc))
}
}
return(result)
}
assoc_table <- pairwise_assoc(data)
head(assoc_table, 20) # preview first 20 rows
## var1 var2 association
## 1 age age 1.000000000
## 2 age job NA
## 3 age marital NA
## 4 age education NA
## 5 age default -0.017879304
## 6 age balance 0.097782739
## 7 age housing -0.185513082
## 8 age loan -0.015655273
## 9 age contact NA
## 10 age day -0.009120046
## 11 age month NA
## 12 age duration -0.004648428
## 13 age campaign 0.004760312
## 14 age pdays -0.107862882
## 15 age previous 0.001836490
## 16 age poutcome NA
## 17 age y 0.025155017
## 18 age y_num 0.025155017
## 19 job age NA
## 20 job job 1.000000000
assoc_table %>%
filter(!is.na(association)) %>%
arrange(desc(abs(association)))
## var1 var2 association
## 1 age age 1.0000000000
## 2 job job 1.0000000000
## 3 marital marital 1.0000000000
## 4 education education 1.0000000000
## 5 default default 1.0000000000
## 6 balance balance 1.0000000000
## 7 housing housing 1.0000000000
## 8 loan loan 1.0000000000
## 9 contact contact 1.0000000000
## 10 day day 1.0000000000
## 11 month month 1.0000000000
## 12 duration duration 1.0000000000
## 13 campaign campaign 1.0000000000
## 14 pdays pdays 1.0000000000
## 15 previous previous 1.0000000000
## 16 poutcome poutcome 1.0000000000
## 17 y y 1.0000000000
## 18 y y_num 1.0000000000
## 19 y_num y 1.0000000000
## 20 y_num y_num 1.0000000000
## 21 contact month 0.5121267993
## 22 month contact 0.5121267993
## 23 housing month 0.5042128413
## 24 month housing 0.5042128413
## 25 job education 0.4582592303
## 26 education job 0.4582592303
## 27 duration y 0.3945210159
## 28 duration y_num 0.3945210159
## 29 y duration 0.3945210159
## 30 y_num duration 0.3945210159
## 31 housing pdays 0.3351238860
## 32 pdays housing 0.3351238860
## 33 poutcome y 0.3116626168
## 34 y poutcome 0.3116626168
## 35 job housing 0.2817399221
## 36 housing job 0.2817399221
## 37 month y 0.2602370423
## 38 y month 0.2602370423
## 39 month poutcome 0.2143362828
## 40 poutcome month 0.2143362828
## 41 housing contact 0.2135850795
## 42 contact housing 0.2135850795
## 43 contact poutcome 0.2074712311
## 44 poutcome contact 0.2074712311
## 45 job marital 0.2060122042
## 46 marital job 0.2060122042
## 47 age housing -0.1855130815
## 48 housing age -0.1855130815
## 49 loan month 0.1828265550
## 50 month loan 0.1828265550
## 51 day campaign 0.1624902163
## 52 campaign day 0.1624902163
## 53 pdays y -0.1522058950
## 54 pdays y_num -0.1522058950
## 55 y pdays -0.1522058950
## 56 y_num pdays -0.1522058950
## 57 contact y 0.1513553979
## 58 y contact 0.1513553979
## 59 job contact 0.1504651748
## 60 contact job 0.1504651748
## 61 housing poutcome 0.1431471729
## 62 poutcome housing 0.1431471729
## 63 housing y 0.1391727025
## 64 y housing 0.1391727025
## 65 housing y_num -0.1391727025
## 66 y_num housing -0.1391727025
## 67 job y 0.1359904718
## 68 y job 0.1359904718
## 69 education contact 0.1227949246
## 70 contact education 0.1227949246
## 71 marital education 0.1216218048
## 72 education marital 0.1216218048
## 73 education housing 0.1193392732
## 74 housing education 0.1193392732
## 75 previous y 0.1137251122
## 76 previous y_num 0.1137251122
## 77 y previous 0.1137251122
## 78 y_num previous 0.1137251122
## 79 job month 0.1102399554
## 80 month job 0.1102399554
## 81 education month 0.1101048520
## 82 month education 0.1101048520
## 83 balance pdays -0.1081221245
## 84 pdays balance -0.1081221245
## 85 age pdays -0.1078628819
## 86 pdays age -0.1078628819
## 87 job loan 0.1065016639
## 88 loan job 0.1065016639
## 89 age balance 0.0977827394
## 90 balance age 0.0977827394
## 91 day pdays -0.0900946441
## 92 pdays day -0.0900946441
## 93 duration campaign -0.0845695027
## 94 campaign duration -0.0845695027
## 95 balance loan -0.0843502457
## 96 loan balance -0.0843502457
## 97 education loan 0.0802788983
## 98 loan education 0.0802788983
## 99 default loan 0.0772342411
## 100 loan default 0.0772342411
## 101 campaign y -0.0731720063
## 102 campaign y_num -0.0731720063
## 103 y campaign -0.0731720063
## 104 y_num campaign -0.0731720063
## 105 education y 0.0726954758
## 106 y education 0.0726954758
## 107 marital month 0.0723165992
## 108 month marital 0.0723165992
## 109 balance housing -0.0687683157
## 110 housing balance -0.0687683157
## 111 loan y 0.0681850347
## 112 y loan 0.0681850347
## 113 loan y_num -0.0681850347
## 114 y_num loan -0.0681850347
## 115 default balance -0.0667450571
## 116 balance default -0.0667450571
## 117 marital y 0.0659256986
## 118 y marital 0.0659256986
## 119 job poutcome 0.0642142026
## 120 poutcome job 0.0642142026
## 121 default month 0.0586747101
## 122 month default 0.0586747101
## 123 day previous -0.0571417686
## 124 previous day -0.0571417686
## 125 loan poutcome 0.0552479823
## 126 poutcome loan 0.0552479823
## 127 balance y 0.0528384103
## 128 balance y_num 0.0528384103
## 129 y balance 0.0528384103
## 130 y_num balance 0.0528384103
## 131 marital loan 0.0519365814
## 132 loan marital 0.0519365814
## 133 campaign pdays 0.0505336900
## 134 pdays campaign 0.0505336900
## 135 marital contact 0.0450906590
## 136 contact marital 0.0450906590
## 137 housing previous 0.0419125713
## 138 previous housing 0.0419125713
## 139 housing loan 0.0413228660
## 140 loan housing 0.0413228660
## 141 default poutcome 0.0404031963
## 142 poutcome default 0.0404031963
## 143 campaign previous -0.0388805268
## 144 previous campaign -0.0388805268
## 145 job default 0.0365333681
## 146 default job 0.0365333681
## 147 education poutcome 0.0356516656
## 148 poutcome education 0.0356516656
## 149 default pdays 0.0337603184
## 150 pdays default 0.0337603184
## 151 pdays previous -0.0321324145
## 152 previous pdays -0.0321324145
## 153 day duration -0.0302063411
## 154 duration day -0.0302063411
## 155 marital poutcome 0.0290826813
## 156 poutcome marital 0.0290826813
## 157 day y -0.0283477767
## 158 day y_num -0.0283477767
## 159 y day -0.0283477767
## 160 y_num day -0.0283477767
## 161 housing day -0.0279816493
## 162 day housing -0.0279816493
## 163 age y 0.0251550171
## 164 age y_num 0.0251550171
## 165 y age 0.0251550171
## 166 y_num age 0.0251550171
## 167 duration pdays -0.0244065859
## 168 pdays duration -0.0244065859
## 169 default contact 0.0244057519
## 170 contact default 0.0244057519
## 171 housing campaign -0.0235987068
## 172 campaign housing -0.0235987068
## 173 loan pdays 0.0224539965
## 174 pdays loan 0.0224539965
## 175 default y 0.0224189659
## 176 y default 0.0224189659
## 177 default y_num -0.0224189659
## 178 y_num default -0.0224189659
## 179 default previous -0.0216973553
## 180 previous default -0.0216973553
## 181 balance duration 0.0215603805
## 182 duration balance 0.0215603805
## 183 balance previous 0.0209881426
## 184 previous balance 0.0209881426
## 185 marital housing 0.0206852270
## 186 housing marital 0.0206852270
## 187 marital default 0.0192303904
## 188 default marital 0.0192303904
## 189 age default -0.0178793036
## 190 default age -0.0178793036
## 191 default campaign 0.0168215314
## 192 campaign default 0.0168215314
## 193 loan contact 0.0162737669
## 194 contact loan 0.0162737669
## 195 education default 0.0158963802
## 196 default education 0.0158963802
## 197 age loan -0.0156552727
## 198 loan age -0.0156552727
## 199 balance campaign -0.0145782789
## 200 campaign balance -0.0145782789
## 201 loan duration -0.0124119718
## 202 duration loan -0.0124119718
## 203 loan previous -0.0119403526
## 204 previous loan -0.0119403526
## 205 loan day 0.0113701576
## 206 day loan 0.0113701576
## 207 default duration -0.0100214613
## 208 duration default -0.0100214613
## 209 loan campaign 0.0099798459
## 210 campaign loan 0.0099798459
## 211 default day 0.0094238991
## 212 day default 0.0094238991
## 213 age day -0.0091200456
## 214 day age -0.0091200456
## 215 default housing 0.0060252184
## 216 housing default 0.0060252184
## 217 housing duration 0.0050754494
## 218 duration housing 0.0050754494
## 219 age campaign 0.0047603118
## 220 campaign age 0.0047603118
## 221 age duration -0.0046484285
## 222 duration age -0.0046484285
## 223 balance day 0.0045025851
## 224 day balance 0.0045025851
## 225 age previous 0.0018364901
## 226 previous age 0.0018364901
## 227 duration previous 0.0003279149
## 228 previous duration 0.0003279149
data <- data |>
mutate(poutcome=na_if(poutcome, "unknown"))
data <- data |>
mutate(job=na_if(job, "unknown"))
data <- data |>
mutate(education=na_if(education, "unknown"))
data <- data |>
mutate(contact=na_if(contact, "unknown"))
library(lubridate)
data$campaign_date <- dmy(paste(data$day, data$month, "2008"))
data$campaign_weekday <- wday(data$campaign_date, label = TRUE)
data$campaign_quarter <- quarter(data$campaign_date)