library(tidyverse)
library(randomForest)
library(gmodels)
CreditCardData_1_ <- ~read_csv("Downloads/CreditCardData(1).csv")
set.seed(123)
CreditCardData_1_ <- ~CreditCardData_1_ %>%
mutate(random = runif(10127),
Attrition_Flag = as.factor(Attrition_Flag))
train <- ~CreditCardData_1_ %>%
filter(random < 0.7) %>%
select(-random)
val <-~ CreditCardData_1_ %>%
filter(random >= 0.7) %>%
select(-random)
rf <-~ randomForest(Attrition_Flag ~ . ,
type = 'classification' ,
data = train ,
importance = TRUE)
summary(rf)
## Length Class Mode
## 2 formula call
MeanDecreaseAccuracy<-rf$importance %>%
data.frame() %>%
rownames_to_column(var = 'feature') %>%
ggplot(aes(x = fct_reorder(feature, MeanDecreaseAccuracy),
y = MeanDecreaseAccuracy)) +
geom_pointrange(aes(ymin = 0, ymax = MeanDecreaseAccuracy), color = 'pink', size = .3) +
theme_minimal() +
coord_flip() +
labs(x = '', y = '', title = 'Variable Importance Using Mean Decrease Accuracy')
MeanDecreaseGini<-rf$importance %>%
data.frame() %>%
rownames_to_column(var = 'feature') %>%
ggplot(aes(x = fct_reorder(feature, MeanDecreaseGini),
y = MeanDecreaseGini)) +
geom_pointrange(aes(ymin = 0, ymax = MeanDecreaseGini), color = 'pink', size = .3) +
theme_minimal() +
coord_flip() +
labs(x = '', y = '', title = 'Variable Importance Using Mean Decrease Gini')
val$Attrition_Flag_predicted <-~ predict(rf, val)
val$Attrition_Flag <- (val$Attrition_Flag)
val$Attrition_Flag_predicted <- (val$Attrition_Flag_predicted)