library(tidyverse)
library(randomForest)
library(gmodels)
CreditCardData_1_ <- ~read_csv("Downloads/CreditCardData(1).csv")


set.seed(123)


CreditCardData_1_ <- ~CreditCardData_1_ %>%
  mutate(random = runif(10127), 
         Attrition_Flag = as.factor(Attrition_Flag))

train <- ~CreditCardData_1_ %>%
  filter(random < 0.7) %>%
  select(-random)

val <-~ CreditCardData_1_ %>%
  filter(random >= 0.7) %>% 
  select(-random)
rf <-~ randomForest(Attrition_Flag ~ . , 
                   type = 'classification' , 
                   data = train ,
                   importance = TRUE)


summary(rf)
##  Length   Class    Mode 
##       2 formula    call
MeanDecreaseAccuracy<-rf$importance %>%
  data.frame() %>%
  rownames_to_column(var = 'feature') %>%
  ggplot(aes(x = fct_reorder(feature, MeanDecreaseAccuracy), 
             y = MeanDecreaseAccuracy)) +
  geom_pointrange(aes(ymin = 0, ymax = MeanDecreaseAccuracy), color = 'pink', size = .3) +
  theme_minimal() +
  coord_flip() +
  labs(x = '', y = '', title = 'Variable Importance Using Mean Decrease Accuracy')
MeanDecreaseGini<-rf$importance %>%
  data.frame() %>%
  rownames_to_column(var = 'feature') %>%
  ggplot(aes(x = fct_reorder(feature, MeanDecreaseGini), 
             y = MeanDecreaseGini)) +
  geom_pointrange(aes(ymin = 0, ymax = MeanDecreaseGini), color = 'pink', size = .3) +
  theme_minimal() +
  coord_flip() +
  labs(x = '', y = '', title = 'Variable Importance Using Mean Decrease Gini')
val$Attrition_Flag_predicted <-~ predict(rf, val)
val$Attrition_Flag <- (val$Attrition_Flag)
val$Attrition_Flag_predicted <- (val$Attrition_Flag_predicted)