Code:
table(bank_data$Total_Trans_Ct)
##
## 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
## 4 2 4 5 9 16 13 13 23 11 19 33 35 34 50 57 56 82 73 75
## 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
## 84 100 104 116 107 136 135 141 139 126 136 138 132 147 127 129 100 110 98 118
## 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
## 91 92 64 85 89 78 106 94 103 97 111 118 134 150 158 166 164 186 170 202
## 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
## 193 203 168 183 190 203 198 197 190 184 173 208 202 169 147 148 133 137 114 93
## 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
## 83 62 66 55 51 40 44 42 31 38 38 25 30 31 31 32 31 14 21 22
## 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
## 25 22 24 23 23 25 32 21 22 16 31 22 18 15 28 12 10 12 10 6
## 130 131 132 134 138 139
## 5 6 1 1 1 1
bank_data <- mutate(bank_data, trans_ct_group = cut
(Total_Trans_Ct, breaks = c(10, 30, 50, 70 ,90, 110, 130, Inf)))
ggplot(bank_data) +
geom_bar(aes(x = trans_ct_group, fill = Attrition_Flag),
alpha = 0.7, position = "fill") +
labs(title = "Total Transaction Count & Attirtion",
x = "Groups of transaction count",
y = "Proportion") +
theme(plot.title = element_text(hjust = 0.5, size = rel(1.8),
color = "navyblue", margin = margin(10,15,10,10)),
axis.title = element_text(hjust = 0.5, size = rel(1.5),
color = "dodgerblue3"),
axis.title.x = element_text(margin = margin(15,10,10,10)),
axis.title.y = element_text(margin = margin(10,10,15,10)))
Code:
bank_data %>%
group_by(Education_Level, Marital_Status) %>%
summarise(Attrition_ratio = prop.table(table(Attrition_Flag))[1]) %>%
arrange(Attrition_ratio)
## # A tibble: 28 × 3
## # Groups: Education_Level [7]
## Education_Level Marital_Status Attrition_ratio
## <chr> <chr> <dbl>
## 1 College Unknown 0.122
## 2 College Divorced 0.128
## 3 Unknown Unknown 0.132
## 4 Uneducated Married 0.142
## 5 Graduate Married 0.144
## 6 High School Married 0.144
## 7 Uneducated Divorced 0.147
## 8 High School Single 0.151
## 9 College Married 0.152
## 10 Post-Graduate Married 0.152
## # ℹ 18 more rows
bank_edu <- factor(bank_data$Education_Level,
levels = unique(bank_data$Education_Level)
[c("College", "Doctorate", "Graduate", "High School",
"Post-Graduate", "Uneducated", "Unknown")])
ggplot(bank_data) +
geom_bar(aes(y = Education_Level, fill = Marital_Status),
alpha = 0.7, position = "fill") +
facet_wrap(~Attrition_Flag, nrow = 2) +
labs(title = "Edu Levels, Marital Status, & Attrition",
x = "Proportion",
y = "Levels of Education") +
theme(plot.title = element_text(hjust = 0.5, size = rel(1.8),
color = "purple4", margin = margin(10,15,10,10)),
axis.title = element_text(hjust = 0.5, size = rel(1.5),
color = "mediumpurple3"),
axis.title.x = element_text(margin = margin(15,10,10,10)),
axis.title.y = element_text(margin = margin(10,10,15,10)))
Answer: According to the graph, married individuals
make up the largest proportion across all education levels, followed by
single and divorced individuals. While marital status may influence
attrition, education level does not show a strong correlation with
customer retention. The distribution remains relatively stable across
both groups, suggesting that other factors might play a more significant
role in customer attrition. Further statistical analysis would be needed
to confirm these observations.
Code:
chisq.test(bank_data$Dependent_count, bank_data$Marital_Status)
##
## Pearson's Chi-squared test
##
## data: bank_data$Dependent_count and bank_data$Marital_Status
## X-squared = 56.253, df = 15, p-value = 1.098e-06
ggplot(bank_data) +
geom_bar(aes(x = Dependent_count, fill = Marital_Status),
alpha = 0.7, position = "fill") +
labs(title = "Relation Between Marital Status & Dependent",
x = "Number of dependent",
y = "Proportion") +
scale_x_continuous(breaks = seq(0, 5, 1)) +
theme(plot.title = element_text(hjust = 0.5, size = rel(1.8), color = "firebrick4",
margin = margin(10,15,10,10)),
axis.title = element_text(hjust = 0.5, size = rel(1.6), color = "firebrick"),
axis.title.x = element_text(margin = margin(15,10,10,10)),
axis.title.y = element_text(margin = margin(10,10,15,10)),
axis.text = element_text(size = rel(1.1)))
Code:
att_rev1 <- bank_data$Avg_Utilization_Ratio[bank_data$Attrition_Flag != "Existing Customer"]
att_rev2 <- bank_data$Avg_Utilization_Ratio[bank_data$Attrition_Flag == "Existing Customer"]
t.test(att_rev1, att_rev2)
##
## Welch Two Sample t-test
##
## data: att_rev1 and att_rev2
## t = -18.623, df = 2336, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1480402 -0.1198331
## sample estimates:
## mean of x mean of y
## 0.1624751 0.2964118
ggplot(bank_data, aes(x = Months_Inactive_12_mon, fill = Attrition_Flag)) +
geom_bar(alpha = 0.7, position = "fill") +
scale_x_continuous(breaks = seq(0, 6, 1)) +
labs(title = "Relation Between Inactive Month & Attrition",
x = "Inactive months",
y = "Proportion") +
scale_x_continuous(breaks = seq(0, 5, 1)) +
theme(plot.title = element_text(hjust = 0.5, size = rel(1.8), color = "cornflowerblue",
margin = margin(10,15,10,10)),
axis.title = element_text(hjust = 0.5, size = rel(1.6), color = "orange2"),
axis.title.x = element_text(margin = margin(15,10,10,10)),
axis.title.y = element_text(margin = margin(10,10,15,10)),
axis.text = element_text(size = rel(1.1)))
chisq.test(bank_data$Months_Inactive_12_mon, bank_data$Attrition_Flag)
##
## Pearson's Chi-squared test
##
## data: bank_data$Months_Inactive_12_mon and bank_data$Attrition_Flag
## X-squared = 396.46, df = 6, p-value < 2.2e-16
ggplot(bank_data,
aes(x = Total_Amt_Chng_Q4_Q1, y = Total_Ct_Chng_Q4_Q1, color = Attrition_Flag)) +
geom_point(alpha = 0.3) +
geom_smooth() +
labs(title = "Total Transaction Amount & Count Ratio vs Attrition",
x = "Total amount in 4th:1st quarter",
y = "Total amount in 4th:1st quarter") +
theme(plot.title = element_text(hjust = 0.5, size = rel(1.7), color = "sienna3",
margin = margin(10,15,10,10)),
axis.title = element_text(hjust = 0.5, size = rel(1.4), color = "olivedrab4"),
axis.title.x = element_text(margin = margin(15,10,10,10)),
axis.title.y = element_text(margin = margin(10,10,15,10)),
axis.text = element_text(size = rel(1.1)))
Answer:
1. Avg_Utilization_Ratio vs Attrition_Flag
2. Months_Inactive_12_mon vs Attrition_Flag
3. Total_Amt_Chng_Q4_Q1, Total_Ct_Chng_Q4_Q1 vs Attrition_Flag