chisq.test(table(bank_data$Gender, bank_data$Income_Category))
##
## Pearson's Chi-squared test
##
## data: table(bank_data$Gender, bank_data$Income_Category)
## X-squared = 7138.4, df = 5, p-value < 2.2e-16
ggplot(bank_data) +
geom_bar(aes(x = Income_Category, fill = Gender), position = "fill") + labs(y = "proportion")
chisq.test(table(bank_data$Attrition_Flag, bank_data$Gender))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table(bank_data$Attrition_Flag, bank_data$Gender)
## X-squared = 13.866, df = 1, p-value = 0.0001964
The chi-square test between Attrition_Flag and Gender yielded p = 0.001 Since p < 0.05, there is significant dependence.
chisq.test(table(bank_data$Attrition_Flag, bank_data$Income_Category))
##
## Pearson's Chi-squared test
##
## data: table(bank_data$Attrition_Flag, bank_data$Income_Category)
## X-squared = 12.832, df = 5, p-value = 0.025
The chi-square test between Attrition_Flag and Income_Category yielded p = 0.025 Since p < 0.05, there is significant dependence.
data1 <- bank_data$Total_Trans_Ct[bank_data$Attrition_Flag != "Existing Customer"]
data2 <- bank_data$Total_Trans_Ct[bank_data$Attrition_Flag == "Existing Customer"]
t.test(data1, data2)
##
## Welch Two Sample t-test
##
## data: data1 and data2
## t = -54.142, df = 3386.1, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -24.59864 -22.87930
## sample estimates:
## mean of x mean of y
## 44.93362 68.67259
The Student T test between Attrition_Flag and Total_Trans_Ct yielded p < 0.05 so that there is significant dependence.
data1 <- bank_data$Months_Inactive_12_mon[bank_data$Attrition_Flag != "Existing Customer"]
data2 <- bank_data$Months_Inactive_12_mon[bank_data$Attrition_Flag == "Existing Customer"]
t.test(data1, data2)
##
## Welch Two Sample t-test
##
## data: data1 and data2
## t = 16.862, df = 2489.8, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.3707468 0.4683249
## sample estimates:
## mean of x mean of y
## 2.693301 2.273765
The Student T test between Attrition_Flag and Months_Inactive_12_mon yielded p < 0.05 so that there is significant dependence.